{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Exercise 1: Load and examine a superstore sales data from an Excel file"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.read_excel(\"Sample - Superstore.xls\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Row ID</th>\n",
       "      <th>Order ID</th>\n",
       "      <th>Order Date</th>\n",
       "      <th>Ship Date</th>\n",
       "      <th>Ship Mode</th>\n",
       "      <th>Customer ID</th>\n",
       "      <th>Customer Name</th>\n",
       "      <th>Segment</th>\n",
       "      <th>Country</th>\n",
       "      <th>City</th>\n",
       "      <th>...</th>\n",
       "      <th>Postal Code</th>\n",
       "      <th>Region</th>\n",
       "      <th>Product ID</th>\n",
       "      <th>Category</th>\n",
       "      <th>Sub-Category</th>\n",
       "      <th>Product Name</th>\n",
       "      <th>Sales</th>\n",
       "      <th>Quantity</th>\n",
       "      <th>Discount</th>\n",
       "      <th>Profit</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>CA-2016-152156</td>\n",
       "      <td>2016-11-08</td>\n",
       "      <td>2016-11-11</td>\n",
       "      <td>Second Class</td>\n",
       "      <td>CG-12520</td>\n",
       "      <td>Claire Gute</td>\n",
       "      <td>Consumer</td>\n",
       "      <td>United States</td>\n",
       "      <td>Henderson</td>\n",
       "      <td>...</td>\n",
       "      <td>42420</td>\n",
       "      <td>South</td>\n",
       "      <td>FUR-BO-10001798</td>\n",
       "      <td>Furniture</td>\n",
       "      <td>Bookcases</td>\n",
       "      <td>Bush Somerset Collection Bookcase</td>\n",
       "      <td>261.9600</td>\n",
       "      <td>2</td>\n",
       "      <td>0.00</td>\n",
       "      <td>41.9136</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>CA-2016-152156</td>\n",
       "      <td>2016-11-08</td>\n",
       "      <td>2016-11-11</td>\n",
       "      <td>Second Class</td>\n",
       "      <td>CG-12520</td>\n",
       "      <td>Claire Gute</td>\n",
       "      <td>Consumer</td>\n",
       "      <td>United States</td>\n",
       "      <td>Henderson</td>\n",
       "      <td>...</td>\n",
       "      <td>42420</td>\n",
       "      <td>South</td>\n",
       "      <td>FUR-CH-10000454</td>\n",
       "      <td>Furniture</td>\n",
       "      <td>Chairs</td>\n",
       "      <td>Hon Deluxe Fabric Upholstered Stacking Chairs,...</td>\n",
       "      <td>731.9400</td>\n",
       "      <td>3</td>\n",
       "      <td>0.00</td>\n",
       "      <td>219.5820</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>CA-2016-138688</td>\n",
       "      <td>2016-06-12</td>\n",
       "      <td>2016-06-16</td>\n",
       "      <td>Second Class</td>\n",
       "      <td>DV-13045</td>\n",
       "      <td>Darrin Van Huff</td>\n",
       "      <td>Corporate</td>\n",
       "      <td>United States</td>\n",
       "      <td>Los Angeles</td>\n",
       "      <td>...</td>\n",
       "      <td>90036</td>\n",
       "      <td>West</td>\n",
       "      <td>OFF-LA-10000240</td>\n",
       "      <td>Office Supplies</td>\n",
       "      <td>Labels</td>\n",
       "      <td>Self-Adhesive Address Labels for Typewriters b...</td>\n",
       "      <td>14.6200</td>\n",
       "      <td>2</td>\n",
       "      <td>0.00</td>\n",
       "      <td>6.8714</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>US-2015-108966</td>\n",
       "      <td>2015-10-11</td>\n",
       "      <td>2015-10-18</td>\n",
       "      <td>Standard Class</td>\n",
       "      <td>SO-20335</td>\n",
       "      <td>Sean O'Donnell</td>\n",
       "      <td>Consumer</td>\n",
       "      <td>United States</td>\n",
       "      <td>Fort Lauderdale</td>\n",
       "      <td>...</td>\n",
       "      <td>33311</td>\n",
       "      <td>South</td>\n",
       "      <td>FUR-TA-10000577</td>\n",
       "      <td>Furniture</td>\n",
       "      <td>Tables</td>\n",
       "      <td>Bretford CR4500 Series Slim Rectangular Table</td>\n",
       "      <td>957.5775</td>\n",
       "      <td>5</td>\n",
       "      <td>0.45</td>\n",
       "      <td>-383.0310</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>US-2015-108966</td>\n",
       "      <td>2015-10-11</td>\n",
       "      <td>2015-10-18</td>\n",
       "      <td>Standard Class</td>\n",
       "      <td>SO-20335</td>\n",
       "      <td>Sean O'Donnell</td>\n",
       "      <td>Consumer</td>\n",
       "      <td>United States</td>\n",
       "      <td>Fort Lauderdale</td>\n",
       "      <td>...</td>\n",
       "      <td>33311</td>\n",
       "      <td>South</td>\n",
       "      <td>OFF-ST-10000760</td>\n",
       "      <td>Office Supplies</td>\n",
       "      <td>Storage</td>\n",
       "      <td>Eldon Fold 'N Roll Cart System</td>\n",
       "      <td>22.3680</td>\n",
       "      <td>2</td>\n",
       "      <td>0.20</td>\n",
       "      <td>2.5164</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>6</td>\n",
       "      <td>CA-2014-115812</td>\n",
       "      <td>2014-06-09</td>\n",
       "      <td>2014-06-14</td>\n",
       "      <td>Standard Class</td>\n",
       "      <td>BH-11710</td>\n",
       "      <td>Brosina Hoffman</td>\n",
       "      <td>Consumer</td>\n",
       "      <td>United States</td>\n",
       "      <td>Los Angeles</td>\n",
       "      <td>...</td>\n",
       "      <td>90032</td>\n",
       "      <td>West</td>\n",
       "      <td>FUR-FU-10001487</td>\n",
       "      <td>Furniture</td>\n",
       "      <td>Furnishings</td>\n",
       "      <td>Eldon Expressions Wood and Plastic Desk Access...</td>\n",
       "      <td>48.8600</td>\n",
       "      <td>7</td>\n",
       "      <td>0.00</td>\n",
       "      <td>14.1694</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>7</td>\n",
       "      <td>CA-2014-115812</td>\n",
       "      <td>2014-06-09</td>\n",
       "      <td>2014-06-14</td>\n",
       "      <td>Standard Class</td>\n",
       "      <td>BH-11710</td>\n",
       "      <td>Brosina Hoffman</td>\n",
       "      <td>Consumer</td>\n",
       "      <td>United States</td>\n",
       "      <td>Los Angeles</td>\n",
       "      <td>...</td>\n",
       "      <td>90032</td>\n",
       "      <td>West</td>\n",
       "      <td>OFF-AR-10002833</td>\n",
       "      <td>Office Supplies</td>\n",
       "      <td>Art</td>\n",
       "      <td>Newell 322</td>\n",
       "      <td>7.2800</td>\n",
       "      <td>4</td>\n",
       "      <td>0.00</td>\n",
       "      <td>1.9656</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>8</td>\n",
       "      <td>CA-2014-115812</td>\n",
       "      <td>2014-06-09</td>\n",
       "      <td>2014-06-14</td>\n",
       "      <td>Standard Class</td>\n",
       "      <td>BH-11710</td>\n",
       "      <td>Brosina Hoffman</td>\n",
       "      <td>Consumer</td>\n",
       "      <td>United States</td>\n",
       "      <td>Los Angeles</td>\n",
       "      <td>...</td>\n",
       "      <td>90032</td>\n",
       "      <td>West</td>\n",
       "      <td>TEC-PH-10002275</td>\n",
       "      <td>Technology</td>\n",
       "      <td>Phones</td>\n",
       "      <td>Mitel 5320 IP Phone VoIP phone</td>\n",
       "      <td>907.1520</td>\n",
       "      <td>6</td>\n",
       "      <td>0.20</td>\n",
       "      <td>90.7152</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>9</td>\n",
       "      <td>CA-2014-115812</td>\n",
       "      <td>2014-06-09</td>\n",
       "      <td>2014-06-14</td>\n",
       "      <td>Standard Class</td>\n",
       "      <td>BH-11710</td>\n",
       "      <td>Brosina Hoffman</td>\n",
       "      <td>Consumer</td>\n",
       "      <td>United States</td>\n",
       "      <td>Los Angeles</td>\n",
       "      <td>...</td>\n",
       "      <td>90032</td>\n",
       "      <td>West</td>\n",
       "      <td>OFF-BI-10003910</td>\n",
       "      <td>Office Supplies</td>\n",
       "      <td>Binders</td>\n",
       "      <td>DXL Angle-View Binders with Locking Rings by S...</td>\n",
       "      <td>18.5040</td>\n",
       "      <td>3</td>\n",
       "      <td>0.20</td>\n",
       "      <td>5.7825</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>10</td>\n",
       "      <td>CA-2014-115812</td>\n",
       "      <td>2014-06-09</td>\n",
       "      <td>2014-06-14</td>\n",
       "      <td>Standard Class</td>\n",
       "      <td>BH-11710</td>\n",
       "      <td>Brosina Hoffman</td>\n",
       "      <td>Consumer</td>\n",
       "      <td>United States</td>\n",
       "      <td>Los Angeles</td>\n",
       "      <td>...</td>\n",
       "      <td>90032</td>\n",
       "      <td>West</td>\n",
       "      <td>OFF-AP-10002892</td>\n",
       "      <td>Office Supplies</td>\n",
       "      <td>Appliances</td>\n",
       "      <td>Belkin F5C206VTEL 6 Outlet Surge</td>\n",
       "      <td>114.9000</td>\n",
       "      <td>5</td>\n",
       "      <td>0.00</td>\n",
       "      <td>34.4700</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>10 rows × 21 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   Row ID        Order ID Order Date  Ship Date       Ship Mode Customer ID  \\\n",
       "0       1  CA-2016-152156 2016-11-08 2016-11-11    Second Class    CG-12520   \n",
       "1       2  CA-2016-152156 2016-11-08 2016-11-11    Second Class    CG-12520   \n",
       "2       3  CA-2016-138688 2016-06-12 2016-06-16    Second Class    DV-13045   \n",
       "3       4  US-2015-108966 2015-10-11 2015-10-18  Standard Class    SO-20335   \n",
       "4       5  US-2015-108966 2015-10-11 2015-10-18  Standard Class    SO-20335   \n",
       "5       6  CA-2014-115812 2014-06-09 2014-06-14  Standard Class    BH-11710   \n",
       "6       7  CA-2014-115812 2014-06-09 2014-06-14  Standard Class    BH-11710   \n",
       "7       8  CA-2014-115812 2014-06-09 2014-06-14  Standard Class    BH-11710   \n",
       "8       9  CA-2014-115812 2014-06-09 2014-06-14  Standard Class    BH-11710   \n",
       "9      10  CA-2014-115812 2014-06-09 2014-06-14  Standard Class    BH-11710   \n",
       "\n",
       "     Customer Name    Segment        Country             City  ...  \\\n",
       "0      Claire Gute   Consumer  United States        Henderson  ...   \n",
       "1      Claire Gute   Consumer  United States        Henderson  ...   \n",
       "2  Darrin Van Huff  Corporate  United States      Los Angeles  ...   \n",
       "3   Sean O'Donnell   Consumer  United States  Fort Lauderdale  ...   \n",
       "4   Sean O'Donnell   Consumer  United States  Fort Lauderdale  ...   \n",
       "5  Brosina Hoffman   Consumer  United States      Los Angeles  ...   \n",
       "6  Brosina Hoffman   Consumer  United States      Los Angeles  ...   \n",
       "7  Brosina Hoffman   Consumer  United States      Los Angeles  ...   \n",
       "8  Brosina Hoffman   Consumer  United States      Los Angeles  ...   \n",
       "9  Brosina Hoffman   Consumer  United States      Los Angeles  ...   \n",
       "\n",
       "  Postal Code  Region       Product ID         Category Sub-Category  \\\n",
       "0       42420   South  FUR-BO-10001798        Furniture    Bookcases   \n",
       "1       42420   South  FUR-CH-10000454        Furniture       Chairs   \n",
       "2       90036    West  OFF-LA-10000240  Office Supplies       Labels   \n",
       "3       33311   South  FUR-TA-10000577        Furniture       Tables   \n",
       "4       33311   South  OFF-ST-10000760  Office Supplies      Storage   \n",
       "5       90032    West  FUR-FU-10001487        Furniture  Furnishings   \n",
       "6       90032    West  OFF-AR-10002833  Office Supplies          Art   \n",
       "7       90032    West  TEC-PH-10002275       Technology       Phones   \n",
       "8       90032    West  OFF-BI-10003910  Office Supplies      Binders   \n",
       "9       90032    West  OFF-AP-10002892  Office Supplies   Appliances   \n",
       "\n",
       "                                        Product Name     Sales  Quantity  \\\n",
       "0                  Bush Somerset Collection Bookcase  261.9600         2   \n",
       "1  Hon Deluxe Fabric Upholstered Stacking Chairs,...  731.9400         3   \n",
       "2  Self-Adhesive Address Labels for Typewriters b...   14.6200         2   \n",
       "3      Bretford CR4500 Series Slim Rectangular Table  957.5775         5   \n",
       "4                     Eldon Fold 'N Roll Cart System   22.3680         2   \n",
       "5  Eldon Expressions Wood and Plastic Desk Access...   48.8600         7   \n",
       "6                                         Newell 322    7.2800         4   \n",
       "7                     Mitel 5320 IP Phone VoIP phone  907.1520         6   \n",
       "8  DXL Angle-View Binders with Locking Rings by S...   18.5040         3   \n",
       "9                   Belkin F5C206VTEL 6 Outlet Surge  114.9000         5   \n",
       "\n",
       "   Discount    Profit  \n",
       "0      0.00   41.9136  \n",
       "1      0.00  219.5820  \n",
       "2      0.00    6.8714  \n",
       "3      0.45 -383.0310  \n",
       "4      0.20    2.5164  \n",
       "5      0.00   14.1694  \n",
       "6      0.00    1.9656  \n",
       "7      0.20   90.7152  \n",
       "8      0.20    5.7825  \n",
       "9      0.00   34.4700  \n",
       "\n",
       "[10 rows x 21 columns]"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.head(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.drop('Row ID',axis=1,inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(9994, 20)"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.shape"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Exercise 2: Subsetting the DataFrame"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Customer ID</th>\n",
       "      <th>Customer Name</th>\n",
       "      <th>City</th>\n",
       "      <th>Postal Code</th>\n",
       "      <th>Sales</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>BH-11710</td>\n",
       "      <td>Brosina Hoffman</td>\n",
       "      <td>Los Angeles</td>\n",
       "      <td>90032</td>\n",
       "      <td>48.860</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>BH-11710</td>\n",
       "      <td>Brosina Hoffman</td>\n",
       "      <td>Los Angeles</td>\n",
       "      <td>90032</td>\n",
       "      <td>7.280</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>BH-11710</td>\n",
       "      <td>Brosina Hoffman</td>\n",
       "      <td>Los Angeles</td>\n",
       "      <td>90032</td>\n",
       "      <td>907.152</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>BH-11710</td>\n",
       "      <td>Brosina Hoffman</td>\n",
       "      <td>Los Angeles</td>\n",
       "      <td>90032</td>\n",
       "      <td>18.504</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>BH-11710</td>\n",
       "      <td>Brosina Hoffman</td>\n",
       "      <td>Los Angeles</td>\n",
       "      <td>90032</td>\n",
       "      <td>114.900</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  Customer ID    Customer Name         City  Postal Code    Sales\n",
       "5    BH-11710  Brosina Hoffman  Los Angeles        90032   48.860\n",
       "6    BH-11710  Brosina Hoffman  Los Angeles        90032    7.280\n",
       "7    BH-11710  Brosina Hoffman  Los Angeles        90032  907.152\n",
       "8    BH-11710  Brosina Hoffman  Los Angeles        90032   18.504\n",
       "9    BH-11710  Brosina Hoffman  Los Angeles        90032  114.900"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_subset = df.loc[[i for i in range(5,10)],['Customer ID','Customer Name','City','Postal Code','Sales']]\n",
    "df_subset"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Exercise 3: An example use case – determining statistics on sales and profit for records 100-199"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_subset = df.loc[[i for i in range(100,200)],['Sales','Profit']]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Sales</th>\n",
       "      <th>Profit</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>100.000000</td>\n",
       "      <td>100.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>262.957220</td>\n",
       "      <td>0.347574</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>858.983762</td>\n",
       "      <td>170.744869</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>1.788000</td>\n",
       "      <td>-1359.992000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>21.327000</td>\n",
       "      <td>1.635900</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>66.960000</td>\n",
       "      <td>9.653600</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>177.095000</td>\n",
       "      <td>23.458800</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>8159.952000</td>\n",
       "      <td>585.552000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "             Sales       Profit\n",
       "count   100.000000   100.000000\n",
       "mean    262.957220     0.347574\n",
       "std     858.983762   170.744869\n",
       "min       1.788000 -1359.992000\n",
       "25%      21.327000     1.635900\n",
       "50%      66.960000     9.653600\n",
       "75%     177.095000    23.458800\n",
       "max    8159.952000   585.552000"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_subset.describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAEKCAYAAADpfBXhAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAdPklEQVR4nO3dfZxdVX3v8c83mQDBIIhAyk2CwZKrE0YlEhTNiBnyKhrAklqljk8o82raeyXS6jWJTmvA3uEm1xYUfIDgpMQWRyw+gERAGmeo0xYUBSEwCrkaS0IwhIeYZCAk4Xf/2GvCyWEmc85k9pzJzvf9ep3XnL320zr77PmefdZeex9FBGZmVixjal0BMzMbfg53M7MCcribmRWQw93MrIAc7mZmBeRwNzMrIIf7KCXpEklR8uiV9ICk+aOgXpurnOeQNN8pw1iPekk/lrQ9bZ+pw7Xsfta1TtLf57X8kSLpIkmjou+zpPMk9Uh6XtK6VBaSLiqZZr6keTWr5AGurtYVsH3aArwzPX8Z8C7gGknbIuIbtatW1Q4BlgDrgPuGaZmfB44C/hjYDmwcpuVaziSNBb4O3Ar8Odn7B/AW4Dclk84H1gDfG9EKFoTDfXTbFRF3lQyvlvRWYB5wIIV7Hl4L3BwRq2tdEQNJ4yPi2QonPx54OfCNiOjuKyzb120/uVnmwLMVGFdaIOlESd+T9HtJWyV9X9JJJePfK+kFSXNKyqam6dtKhkPS+yX9U1rOJklLBqvQYOtPdQb4x5Jmpqn7WN4pklanpqinJV0vaWJpPYE/BP46LatrH8tqkfSQpGclbZZ0p6STS8YvTc1d2yStT+v6gwpe89vSsnolPSnpWklHlIw/StLXJD0m6TlJ/yXp2kGWeY6kO9J2/72kuySdVTbNJel1zEjjeyXdK+ltZdMdKulLkp6R9JSkKyjbbwaoQ9/yZ0n6ear7fZIay6ZbJ+kfJP2tpPXA70vGnZ+26Q5Jj0pqk1SXxn0EeDRNelN6/y5J4/Y0y6T39FTggpJ95iOD1d9KRIQfo/ABXAJsJvt2VUd2pPNBYBfw4ZLpDgV+DfwK+DPgT8m+ym4Aji6ZrgP4bVqOgE7gF8AhafxUINJ81wDvANqAF4CPldermvUDTWnZfwecnh6HDvC6jwWeAf6T7BvKB4H1wP1kzTuHpvk3Aten59MHWNYZwE7g08Bssiac/wO8tWSaFUAz8HbgPWm9DwFjSqZZB/x9yfAsYAdwA3A28KH0em8sW+4v0zZ5e3odywd5zy8CPp62/R8BlwO7gVll2783bY+PAnOBu4AngMNLprsCeA74ZJrmO2k7RgX7XS9Z88h8sqbALrIP6D8o2yYbgX9N2/Xdqfys9F6vJGtSXJi21dUl7++fpGk+md6/yWlcABel59OBHmBVyT5zbK3/Lw+kR80r4McAb0z2Txb9PL5YNt1fkgX+q0vKJgPPA58uKTsaeAxoTwGyA3hDyfipafk/LFv+tSm4xpTUa3M16wcmpGV/pILXvZQs3F9eUvbmNH9zSdk6SgJ3gGX9L+BnVWzzscCktK4zBloX8GOgs2zeM9N8DWl4DbBgP97/MWQf6rcDK/rZL84sKTsllb0zDb8SeBZYVLa8X1JZuAfw/pKyCcBTwNKybbIROKxs/rv62TYLyT6k+kK8b187t2y6PeGehu8Brhup/7miPdwsM7ptAU5Lj0bgYrKvqaVNJW8Cfh4Rv+4riIj1wL+nefrKniI7eXUh2cnIz0XEL/pZ53fLhr8D/DeywO5PReuvwpvIPmD2fM2PiLvJwqTa5d0HzJB0haQzJB1SPoGkuZL+Q9IWsg+p9WnUf+9vgZIOJzvx9y1JdX0PoJvsW8KpJev+lKT/KanfZfWz7MmSVkrakOqyk+xIuHz+58mOpvs8lP72vUevAw4DbuqbICJeKB2uwJ79ICK2AXeQvTelVkfEcyX1Hwu8EfiXsuluIPtweUsV67f95HAf3XZFxD3p8e8RcSXwOeAzko5O0xwP/K6feX9HdrRe6kepfAzZEXl/Ng0wfPwA01ez/koM2/Ii4l/Jmi7OIAvDzZK+LOllAJJOA24mC/QPkYXP6Wn2wwZY7CvIjvC/Qha+fY8dZG3aU9J0F5H18vgs8CtJj0h630B1lTQm1eWtaZ4msg/1W/upy9YU1n2v8/myOvedMxjovRzMtnjpydFNvHQfKH+fjiHbBuXlfcND2R9siNxb5sDTQ9b2/IdkX5U3Aif3M93ENL7UUrJgehz4AvD+fuY7boDhgboaVrP+Smzspw59y/tZtQuLiJXASknHAu8ma4veCiwma/t9AvizSO0Akl41yCKfIWs+uAT4QT/jH0vrfYas+evjkl5P1jRxvaT7I+KhfuY7CZgBzI2I2/oKJY2v8KWWejz9PY6934P+tmt/JuilvV+O46X7QHmf+c1kH3Tl65mY/g5lf7Ah8pH7gach/e3rcXA3cKqkE/smkDSJ7Aiwu6RsNrAA+B9AC9As6U/7Wf6flA2/m+yfen0/01a6/vIjy325G3hHWc+T08jaabsHmmkwEfFERFxD1l4+PRWPB3b2BXvygUGWs52sXfk1Jd+qSh+P9TPP/cCnyP7fXjvAovtCfEdfQfqgmVXByyv3ANnJ1PNKljWmdLgCe/YDSRPITvD+ZF8zRMRusg/g95aNOp/sxPx/VrF+yPabSvYZ64eP3Ee3Okl9zQSHkLXn/g1wU0T0HZ1dBywCbpX0WbITV0vIjqKugT3/nCuAGyLixlR2DfBVSf8WEU+UrPPkNO7bZM0ZLcDFpc0AZQZdf0Q8L+k3wPmS1pAFz/0lzQmlLif7ALpd0jKyk3lLyQLr24NvshdJupSsKaAr1WcGWc+VxWmSO4C/kvQF4PtkH0gfrGDRC8muOXgBuJHsm8AJwDlAa0Q8LKmbrN16DdkRbt/FOgMF5C/JPkD/QdLfAkcAl5KdzK5KRDwpaTlwqaRdwINp/RMqXMSzQFvabx4jOzF9CPDFCuZdQvbe/SPwTbL2/78Drk3nYqrxS7IP+ncATwK/iYgnq1zGwavWZ3T96P/BS3vLPA88AiwDjiib9tVk7btbgW3ALcC0kvHXkB19l3aNnEDWhfHbaXhqWs8HyLpNbiVrsrgUUFm9Nlez/jTNWWTd955L65m6j9c+g+z8QC9ZM8g3gIll06xj8N4y5wKr0+t4jqy75uKy17OQ7FvQdrJufdN4aa+Nl6yLrAfPbWT9u7eTndS8HDgyjf882QfS1vQaOoG3DVLf08jC/9n0Xn+E7MPznn1t/1ReXudDyc4LbAGeBq4CPkFlvWU2A28jOym8g6zL7Bll0w24/cm6fz5Ats+uJ+tSW1cyvm9fG6y3zKvTe7KFCntb+fHiQ2kj2kFO2UVFvwHeFRG31LY2VivpgqKLIuKYWtfF9o/b3M3MCqiicE+XGj+QLkO+J5UdnS6VfiT9fUUql6QrJa2VdL+kN+b5AszM7KUqapZRdkvOmRGxuaTs/wJPRcRSSYuBV0TEIklnk/XKOJusXfKLEfHmXGpvZmb92p9mmfPI7h9B+juvpPzrkbkLOErSQBfAmJlZDirtChnAD5Xdje+aiFhO1nuh76KGx3nxQoVJvNgHG7Kz5ZMouwBC2Y9OzAcYP378qVOmTMGGbt26dRx33HEcfvjhvPDCC4wZM4be3l42bdrE1KlTa109M4A9+6YNj4cffnhzRBzb37hKw70xIjZIOg64Q9IvS0dGRKjKX3hJHxDLAWbOnBn33HNPNbNbmY6ODlpbW7n66qvZvXs3Y8eOpaWlheuuu47m5uZaV88MgK6uLmbPnl3rahSGpN8ONK6icI+IDenvJknfJbuB0O8kHR8RG1OzS999Kzbw4v01ILuZUdUXYlh1+gJ8wYIF9PT0UF9fT1tbm4Pd7CA16PcjSS/ruxQ83XDpLLKr7m4GLkiTXcCLd5y7Gfhw6jVzOrClpPnGctTc3MyaNWtYvXo1a9ascbCbHcQqOXKfCHxXUt/034iI2yT9lOy2py1kPwJxfpr+B2Q9ZdaSXWH40WGvtZmZ7dOg4R7Zfbrf0E/5k8CcfsoD+Niw1M7MzIbEp63NzArI4W5mVkAOdzOzAnK4m5kVkMPdzKyAHO5mZgXkcDczKyCHu5lZATnczcwKyOFuZlZADvcC6ejooKGhgTlz5tDQ0EBHR0etq2RmNVLp/dxtlOu7n3t7e/te93MHfHdIs4OQj9wLoq2tjfb2dpqamqirq6OpqYn29nba2tpqXTUzqwGHe0H09PTQ2Ni4V1ljYyM9PT01qpGZ1ZLDvSDq6+vp7u7eq6y7u5v6+voa1cjMasnhXhCtra20tLTQ2dnJrl276OzspKWlhdbW1lpXzcxqwCdUC8K/oWpmpRzuBdLc3Exzc7N/Yd7M3CxjZlZEDnczswJyuJuZFZDD3cysgBzuZmYF5HA3Mysgh7uZWQE53M3MCsjhbmZWQA53M7MCcribmRWQw93MrIAc7mZmBeRwNzMrIIe7mVkBOdzNzArIP9ZxgJI0pPkiYphrYmajUcVH7pLGSrpX0i1p+ERJd0taK+kGSYek8kPT8No0fmo+VT+4RcSAj1ctumXAcWZ2cKimWeZioKdkeBlwRUScBDwNtKTyFuDpVH5Fms7MzEZQReEuaTJwDvC1NCzgTODGNMlKYF56fl4aJo2fo6G2IZiZ2ZBU2ub+BWAhcEQafiXwTETsSsPrgUnp+STgUYCI2CVpS5p+c+kCJc0H5gNMnDiRrq6uIb4E64+3p41G27Zt8745QgYNd0nnApsi4meSZg/XiiNiObAcYObMmTF79rAt2m5bhbenjUZdXV3eN0dIJUfus4A/lnQ2cBjwcuCLwFGS6tLR+2RgQ5p+AzAFWC+pDjgSeHLYa25mZgMatM09Ij4dEZMjYirwPuBHEfEBoBN4T5rsAuCm9PzmNEwa/6NwNw0zsxG1PxcxLQI+IWktWZt6eypvB16Zyj8BLN6/KpqZWbWquogpIrqArvT818Cb+pnmOeC9w1A3MzMbIt9+wMysgBzuZmYF5HA3Mysgh7uZWQE53M3MCsjhbmZWQA53M7MCcribmRWQw93MrIAc7mZmBeRwNzMrIIe7mVkBOdzNzArI4W5mVkAOdzOzAnK4m5kVkMPdzKyAHO5mZgXkcDczKyCHu5lZATnczcwKyOFuZlZADnczswJyuJuZFZDD3cysgBzuZmYF5HA3Mysgh7uZWQE53M3MCsjhbmZWQA53M7MCcribmRWQw93MrIAc7mZmBTRouEs6TNJPJP1C0oOSLk3lJ0q6W9JaSTdIOiSVH5qG16bxU/N9CWZmVq6SI/cdwJkR8QbgFOCdkk4HlgFXRMRJwNNAS5q+BXg6lV+RpjMzsxE0aLhHZlsaHJceAZwJ3JjKVwLz0vPz0jBp/BxJGrYam5nZoOoqmUjSWOBnwEnAl4H/BzwTEbvSJOuBSen5JOBRgIjYJWkL8Epgc9ky5wPzASZOnEhXV9d+vRDbm7enjUbbtm3zvjlCKgr3iNgNnCLpKOC7wGv3d8URsRxYDjBz5syYPXv2/i7S+ty2Cm9PG426urq8b46QqnrLRMQzQCfwFuAoSX0fDpOBDen5BmAKQBp/JPDksNTWzMwqUklvmWPTETuSxgN/BPSQhfx70mQXADel5zenYdL4H0VEDGelzcxs3yppljkeWJna3ccA34qIWyQ9BHxT0v8G7gXa0/TtwD9JWgs8Bbwvh3qbmdk+DBruEXE/MKOf8l8Db+qn/DngvcNSOzMzGxJfoWpmVkAOdzOzAnK4m5kVkMPdzKyAHO5mZgXkcDczKyCHu5lZATnczcwKyOFuZlZADnczswJyuJuZFZDD3cysgBzuZmYF5HA3Mysgh7uZWQE53M3MCsjhbmZWQA53M7MCcribmRWQw93MctfR0UFDQwNz5syhoaGBjo6OWlep8Ab9gWwzs/3R0dFBa2sr7e3t7N69m7Fjx9LS0gJAc3NzjWtXXD5yN7NctbW10d7eTlNTE3V1dTQ1NdHe3k5bW1utq1ZoDnczy1VPTw+NjY17lTU2NtLT01OjGh0cHO5mlqv6+nq6u7v3Kuvu7qa+vr5GNTo4ONzNLFetra20tLTQ2dnJrl276OzspKWlhdbW1lpXrdB8QtXMctV30nTBggX09PRQX19PW1ubT6bmzOFuZrlrbm6mubmZrq4uZs+eXevqHBTcLGNmuXM/95HnI3czy5X7udeGj9zNLFfu514bDnczy5X7udeGw93McuV+7rXhcDezXLmfe234hKqZ5cr93GvD4W5muXM/95E3aLOMpCmSOiU9JOlBSRen8qMl3SHpkfT3Falckq6UtFbS/ZLemPeLMDOzvVXS5r4L+GRETAdOBz4maTqwGFgdEdOA1WkYYC4wLT3mA18d9lqbmdk+DRruEbExIn6enm8FeoBJwHnAyjTZSmBeen4e8PXI3AUcJen4Ya+5mZkNqKo2d0lTgRnA3cDEiNiYRj0OTEzPJwGPlsy2PpVtLClD0nyyI3smTpxIV1dXdTW3ffL2tNFo27Zt3jdHSMXhLmkC8G3gryLi95L2jIuIkBTVrDgilgPLAWbOnBk+yTKMblvlk1Y2KvmE6sipqJ+7pHFkwX59RHwnFf+ur7kl/d2UyjcAU0pmn5zKzMxshFTSW0ZAO9ATEZeXjLoZuCA9vwC4qaT8w6nXzOnAlpLmGzMzGwGVNMvMAj4EPCDpvlT2GWAp8C1JLcBvgfPTuB8AZwNrgV7go8NaYzMzG9Sg4R4R3YAGGD2nn+kD+Nh+1svMzPaD7y1jZlZADnczswJyuJuZFZDD3cysgHxXyFHuDZf+kC3P7qx6vqmLV1U87ZHjx/GLJWdVvQ4zG70c7qPclmd3sm7pOVXNU+1VgNV8EJjZgcHNMmaWu46ODhoaGpgzZw4NDQ10dHTUukqF5yN3M8tVR0cHra2ttLe3s3v3bsaOHUtLSwuAf40pRz5yN7NctbW10d7eTlNTE3V1dTQ1NdHe3k5bW1utq1ZoDnczy1VPTw+NjY17lTU2NtLT01OjGh0cHO5mlqv6+nq6u7v3Kuvu7qa+vr5GNTo4ONzNLFetra20tLTQ2dnJrl276OzspKWlhdbW1lpXrdB8QtXMctV30nTBggX09PRQX19PW1ubT6bmzOFuZrlrbm6mubnZv8Q0gtwsY2ZWQA53M7MCcribmRWQw93MrIAc7mZmBeRwNzMrIIe7mVkBOdzNLHe+5e/I80VMZpYr3/K3Nnzkbma58i1/a8Phbma58i1/a8Phbma58i1/a8Phbma58i1/a8MnVM0sV77lb2043M0sd77l78hzs4yZ5c793Eeej9zNLFfu514bPnI3s1y5n3ttONzNLFfu514bbpYxs1zV19dz6aWX8r3vfW9Pb5l58+a5n3vOBj1yl7RC0iZJa0rKjpZ0h6RH0t9XpHJJulLSWkn3S3pjnpU3s9GvqamJZcuWceGFF7Jq1SouvPBCli1bRlNTU62rVmiVNMtcB7yzrGwxsDoipgGr0zDAXGBaeswHvjo81TSzA1VnZyeLFi1ixYoVnHPOOaxYsYJFixbR2dlZ66oV2qDhHhH/BjxVVnwesDI9XwnMKyn/emTuAo6SdPxwVdbMDjw9PT0sWbKENWvWsHr1atasWcOSJUvc5p6zoba5T4yIjen548DE9HwS8GjJdOtT2UbKSJpPdnTPxIkT6erqGmJViq/abbNt27aq5/H2t7yccMIJfOlLX2LGjBl79s17772XE044wftdjvb7hGpEhKQYwnzLgeUAM2fODF+1NoDbVlV9RV/VVwEOYR1mlbrsssv29HM/7LDDiAiuuuoqLrvsMu93ORpquP9O0vERsTE1u2xK5RuAKSXTTU5lZnaQ8r1lamOo/dxvBi5Izy8Abiop/3DqNXM6sKWk+cbMDlLNzc17tbk72PM36JG7pA5gNnCMpPXAEmAp8C1JLcBvgfPT5D8AzgbWAr3AR3Oos5mZDWLQcI+IgT5i5/QzbQAf299KmZnZ/vHtB8wsd74r5MhzuJtZrjo6Orj44ovZvn07ANu3b+fiiy92wOfM4W5muVq4cCF1dXWsWLGC22+/nRUrVlBXV8fChQtrXbVCc7ibWa7Wr1/PypUr97rl78qVK1m/fn2tq1ZoDnczswLyLX/NLFeTJ09m3rx57Ny5k507dzJu3DjGjRvH5MmTa121QvORu5nlavr06fT29jJhwgQkMWHCBHp7e5k+fXqtq1ZoDnczy9Wdd97JrFmz6O3tJSLo7e1l1qxZ3HnnnbWuWqG5WcbMcrVjxw42bNjArbfeuucHsi+88EJ27NhR66oVmsN9lDuifjGvW7l48AnLrRx8khfXAXBO9eswq4Ak5s6dS1NT0547ls6dO5err7661lUrNIf7KLe1ZynrllYXvNXe8nfq4lVV1sqschHBtddey0knncT06dO5/PLLufbaa8nuVmJ5cbibWa5OPvlkpk2bxmc+8xl27NjBoYceyrnnnssjjzxS66oVmsPdzHLV2tpKa2vrXm3uLS0ttLW11bpqheZwN7Nc+cc6asPhbma5a25uprm5ufqfgLQhc7ib2bCTVPU8PsE6vHwRk5kNu4jo9/GqRbcMOM6Gl8PdzKyAHO5mZgXkcDczKyCHu5lZATnczcwKyOFuZlZADnczswJyuJuZFZDD3cysgBzuZmYF5HA3Mysg3zjMzIbsDZf+kC3P7qxqnmp/+evI8eP4xZKzqprHHO4HhCH9DN5tlc9z5Phx1S/fDNjy7M6qfgZyKLf89c9ADo3DfZSr9vdTIftnGMp8ZlYcbnM3Mysgh7uZWQE53M3MCsht7mY2ZEfUL+Z1KxdXN9PKatcB4HNI1col3CW9E/giMBb4WkQszWM9ZlZbW3uWurfMKDXszTKSxgJfBuYC04FmSdOHez1mZjawPI7c3wSsjYhfA0j6JnAe8FAO6zpoDfbr8lrWf7l/iNiGW9VH1lVcgwG+DmOo8gj3ScCjJcPrgTeXTyRpPjA/DW6T9Ksc6nKwOgbY3N+IwT4UzHI24L65L7pk+CtSEK8aaETNTqhGxHJgea3WX2SS7omImbWuh1k575sjJ4+ukBuAKSXDk1OZmZmNkDzC/afANEknSjoEeB9wcw7rMTOzAQx7s0xE7JJ0EXA7WVfIFRHx4HCvx/bJzV02WnnfHCFy7wkzs+Lx7QfMzArI4W5mVkAO9wOApFZJD0q6X9J9kl5y3UDJtNdJes9I1s8OXpJ2p31yjaR/kXR4lfO/V1KPpE5JMyVdmcpnS3prPrU+OPjGYaOcpLcA5wJvjIgdko4BDqlxtcz6PBsRpwBIuh74S+DyvpHKrppTRLwwwPwtwJ9HRHcavif9nQ1sA/4jj0ofDHzkPvodD2yOiB0AEbE5Ih6T9FlJP01HTMvVz6Wnkk6VdKekn0m6XdLxqfzjkh5K3wS+OcKvx4rrx8BJkqZK+pWkrwNrgCmSmiU9kPbXZQCSPgs0Au2SPp+O1m+RNJXsQ+Kv07eCt9Xo9RzQHO6j3w/J/jkelvQVSW9P5V+KiNMiogEYT3Z0v4ekccBVwHsi4lRgBdCWRi8GZkTE68n+icz2i6Q6spsFPpCKpgFfiYiTgZ3AMuBM4BTgNEnzIuJzZEfqH4iIT/UtKyLWAVcDV0TEKRHx45F7JcXhcB/lImIbcCrZfXieAG6Q9BGgSdLdkh4g+6c5uWzW1wANwB2S7gP+huxqYYD7geslfRDYlf+rsAIbn/ave4D/AtpT+W8j4q70/DSgKyKeiIhdwPXAGSNf1YOL29wPABGxG+gCulKY/wXwemBmRDwq6RLgsLLZBDwYEW/pZ5HnkP1zvQtolfS69E9nVq09be59Ugvh9tpUx/r4yH2Uk/QaSdNKik4B+u6guVnSBKC/3jG/Ao5NJ2SRNE7SyZLGAFMiohNYBBwJTMjvFZjxE+Dtko5Jv/fQDNw5yDxbgSNyr1mB+ch99JsAXCXpKLImlLVkTTTPkJ2sepzsfj57iYjnU5fIKyUdSfZefwF4GPjnVCbgyoh4ZkReiR2UImKjpMVAJ9k+tyoibhpktu8DN0o6D1jgdvfq+fYDZmYF5GYZM7MCcribmRWQw93MrIAc7mZmBeRwNzMrIIe7mVkBOdzNzAro/wN8OB2EHqJuuQAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "df_subset.plot.box()\n",
    "plt.title(\"Boxplot of sales and profit\",fontsize=15)\n",
    "plt.ylim(0,500)\n",
    "plt.grid(True)\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Exercise 4: A useful function – unique"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array(['Kentucky', 'California', 'Florida', 'North Carolina',\n",
       "       'Washington', 'Texas', 'Wisconsin', 'Utah', 'Nebraska',\n",
       "       'Pennsylvania', 'Illinois', 'Minnesota', 'Michigan', 'Delaware',\n",
       "       'Indiana', 'New York', 'Arizona', 'Virginia', 'Tennessee',\n",
       "       'Alabama', 'South Carolina', 'Oregon', 'Colorado', 'Iowa', 'Ohio',\n",
       "       'Missouri', 'Oklahoma', 'New Mexico', 'Louisiana', 'Connecticut',\n",
       "       'New Jersey', 'Massachusetts', 'Georgia', 'Nevada', 'Rhode Island',\n",
       "       'Mississippi', 'Arkansas', 'Montana', 'New Hampshire', 'Maryland',\n",
       "       'District of Columbia', 'Kansas', 'Vermont', 'Maine',\n",
       "       'South Dakota', 'Idaho', 'North Dakota', 'Wyoming',\n",
       "       'West Virginia'], dtype=object)"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['State'].unique()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "49"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['State'].nunique()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array(['United States'], dtype=object)"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['Country'].unique()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.drop('Country',axis=1,inplace=True)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Exercise 5: Conditional Selection and Boolean Filtering"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Ship Mode</th>\n",
       "      <th>State</th>\n",
       "      <th>Sales</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Second Class</td>\n",
       "      <td>Kentucky</td>\n",
       "      <td>261.9600</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Second Class</td>\n",
       "      <td>Kentucky</td>\n",
       "      <td>731.9400</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Second Class</td>\n",
       "      <td>California</td>\n",
       "      <td>14.6200</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Standard Class</td>\n",
       "      <td>Florida</td>\n",
       "      <td>957.5775</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Standard Class</td>\n",
       "      <td>Florida</td>\n",
       "      <td>22.3680</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>Standard Class</td>\n",
       "      <td>California</td>\n",
       "      <td>48.8600</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>Standard Class</td>\n",
       "      <td>California</td>\n",
       "      <td>7.2800</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>Standard Class</td>\n",
       "      <td>California</td>\n",
       "      <td>907.1520</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>Standard Class</td>\n",
       "      <td>California</td>\n",
       "      <td>18.5040</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>Standard Class</td>\n",
       "      <td>California</td>\n",
       "      <td>114.9000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        Ship Mode       State     Sales\n",
       "0    Second Class    Kentucky  261.9600\n",
       "1    Second Class    Kentucky  731.9400\n",
       "2    Second Class  California   14.6200\n",
       "3  Standard Class     Florida  957.5775\n",
       "4  Standard Class     Florida   22.3680\n",
       "5  Standard Class  California   48.8600\n",
       "6  Standard Class  California    7.2800\n",
       "7  Standard Class  California  907.1520\n",
       "8  Standard Class  California   18.5040\n",
       "9  Standard Class  California  114.9000"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_subset = df.loc[[i for i in range (10)],['Ship Mode','State','Sales']]\n",
    "df_subset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "ename": "TypeError",
     "evalue": "'>' not supported between instances of 'str' and 'int'",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-17-78bbdddecb39>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdf_subset\u001b[0m\u001b[0;34m>\u001b[0m\u001b[0;36m100\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[0;32m~/.local/lib/python3.6/site-packages/pandas/core/ops/__init__.py\u001b[0m in \u001b[0;36mf\u001b[0;34m(self, other)\u001b[0m\n\u001b[1;32m   1577\u001b[0m             \u001b[0;31m# straight boolean comparisons we want to allow all columns\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1578\u001b[0m             \u001b[0;31m# (regardless of dtype to pass thru) See #4537 for discussion.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1579\u001b[0;31m             \u001b[0mres\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_combine_const\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mother\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1580\u001b[0m             \u001b[0;32mreturn\u001b[0m \u001b[0mres\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfillna\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mastype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mbool\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1581\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/.local/lib/python3.6/site-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36m_combine_const\u001b[0;34m(self, other, func)\u001b[0m\n\u001b[1;32m   5394\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0m_combine_const\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mother\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   5395\u001b[0m         \u001b[0;32massert\u001b[0m \u001b[0mlib\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mis_scalar\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mother\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mndim\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mother\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 5396\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0mops\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdispatch_to_series\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mother\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   5397\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   5398\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0mcombine\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mother\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfill_value\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0moverwrite\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/.local/lib/python3.6/site-packages/pandas/core/ops/__init__.py\u001b[0m in \u001b[0;36mdispatch_to_series\u001b[0;34m(left, right, func, str_rep, axis)\u001b[0m\n\u001b[1;32m    594\u001b[0m         \u001b[0;32mraise\u001b[0m \u001b[0mNotImplementedError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mright\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    595\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 596\u001b[0;31m     \u001b[0mnew_data\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mexpressions\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mevaluate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcolumn_op\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mstr_rep\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mleft\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mright\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    597\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    598\u001b[0m     \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mleft\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_constructor\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnew_data\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mindex\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mleft\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcopy\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/.local/lib/python3.6/site-packages/pandas/core/computation/expressions.py\u001b[0m in \u001b[0;36mevaluate\u001b[0;34m(op, op_str, a, b, use_numexpr, **eval_kwargs)\u001b[0m\n\u001b[1;32m    218\u001b[0m     \u001b[0muse_numexpr\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0muse_numexpr\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0m_bool_arith_check\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mop_str\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0ma\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mb\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    219\u001b[0m     \u001b[0;32mif\u001b[0m \u001b[0muse_numexpr\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 220\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0m_evaluate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mop\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mop_str\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0ma\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mb\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0meval_kwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    221\u001b[0m     \u001b[0;32mreturn\u001b[0m \u001b[0m_evaluate_standard\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mop\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mop_str\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0ma\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mb\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    222\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/.local/lib/python3.6/site-packages/pandas/core/computation/expressions.py\u001b[0m in \u001b[0;36m_evaluate_standard\u001b[0;34m(op, op_str, a, b, **eval_kwargs)\u001b[0m\n\u001b[1;32m     68\u001b[0m         \u001b[0m_store_test_result\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     69\u001b[0m     \u001b[0;32mwith\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0merrstate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mall\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"ignore\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 70\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0mop\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ma\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mb\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     71\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     72\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/.local/lib/python3.6/site-packages/pandas/core/ops/__init__.py\u001b[0m in \u001b[0;36mcolumn_op\u001b[0;34m(a, b)\u001b[0m\n\u001b[1;32m    568\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    569\u001b[0m         \u001b[0;32mdef\u001b[0m \u001b[0mcolumn_op\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ma\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mb\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 570\u001b[0;31m             \u001b[0;32mreturn\u001b[0m \u001b[0;34m{\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ma\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0miloc\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mb\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ma\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    571\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    572\u001b[0m     \u001b[0;32melif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mright\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mABCDataFrame\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/.local/lib/python3.6/site-packages/pandas/core/ops/__init__.py\u001b[0m in \u001b[0;36m<dictcomp>\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m    568\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    569\u001b[0m         \u001b[0;32mdef\u001b[0m \u001b[0mcolumn_op\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ma\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mb\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 570\u001b[0;31m             \u001b[0;32mreturn\u001b[0m \u001b[0;34m{\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ma\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0miloc\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mb\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ma\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    571\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    572\u001b[0m     \u001b[0;32melif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mright\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mABCDataFrame\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/.local/lib/python3.6/site-packages/pandas/core/ops/__init__.py\u001b[0m in \u001b[0;36mwrapper\u001b[0;34m(self, other, axis)\u001b[0m\n\u001b[1;32m   1227\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1228\u001b[0m             \u001b[0;32mwith\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0merrstate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mall\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"ignore\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1229\u001b[0;31m                 \u001b[0mres\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mna_op\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mother\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1230\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0mis_scalar\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mres\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1231\u001b[0m                 raise TypeError(\n",
      "\u001b[0;32m~/.local/lib/python3.6/site-packages/pandas/core/ops/__init__.py\u001b[0m in \u001b[0;36mna_op\u001b[0;34m(x, y)\u001b[0m\n\u001b[1;32m   1089\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1090\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mis_object_dtype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1091\u001b[0;31m             \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_comp_method_OBJECT_ARRAY\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mop\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1092\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1093\u001b[0m         \u001b[0;32melif\u001b[0m \u001b[0mis_datetimelike_v_numeric\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/.local/lib/python3.6/site-packages/pandas/core/ops/__init__.py\u001b[0m in \u001b[0;36m_comp_method_OBJECT_ARRAY\u001b[0;34m(op, x, y)\u001b[0m\n\u001b[1;32m   1067\u001b[0m         \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlibops\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvec_compare\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mop\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1068\u001b[0m     \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1069\u001b[0;31m         \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlibops\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mscalar_compare\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mop\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1070\u001b[0m     \u001b[0;32mreturn\u001b[0m \u001b[0mresult\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1071\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32mpandas/_libs/ops.pyx\u001b[0m in \u001b[0;36mpandas._libs.ops.scalar_compare\u001b[0;34m()\u001b[0m\n",
      "\u001b[0;31mTypeError\u001b[0m: '>' not supported between instances of 'str' and 'int'"
     ]
    }
   ],
   "source": [
    "df_subset>100"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "ename": "TypeError",
     "evalue": "'>' not supported between instances of 'str' and 'int'",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-19-9f0174bef77a>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdf_subset\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mdf_subset\u001b[0m\u001b[0;34m>\u001b[0m\u001b[0;36m100\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[0;32m~/.local/lib/python3.6/site-packages/pandas/core/ops/__init__.py\u001b[0m in \u001b[0;36mf\u001b[0;34m(self, other)\u001b[0m\n\u001b[1;32m   1577\u001b[0m             \u001b[0;31m# straight boolean comparisons we want to allow all columns\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1578\u001b[0m             \u001b[0;31m# (regardless of dtype to pass thru) See #4537 for discussion.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1579\u001b[0;31m             \u001b[0mres\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_combine_const\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mother\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1580\u001b[0m             \u001b[0;32mreturn\u001b[0m \u001b[0mres\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfillna\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mastype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mbool\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1581\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/.local/lib/python3.6/site-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36m_combine_const\u001b[0;34m(self, other, func)\u001b[0m\n\u001b[1;32m   5394\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0m_combine_const\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mother\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   5395\u001b[0m         \u001b[0;32massert\u001b[0m \u001b[0mlib\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mis_scalar\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mother\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mndim\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mother\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 5396\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0mops\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdispatch_to_series\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mother\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   5397\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   5398\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0mcombine\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mother\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfill_value\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0moverwrite\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/.local/lib/python3.6/site-packages/pandas/core/ops/__init__.py\u001b[0m in \u001b[0;36mdispatch_to_series\u001b[0;34m(left, right, func, str_rep, axis)\u001b[0m\n\u001b[1;32m    594\u001b[0m         \u001b[0;32mraise\u001b[0m \u001b[0mNotImplementedError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mright\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    595\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 596\u001b[0;31m     \u001b[0mnew_data\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mexpressions\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mevaluate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcolumn_op\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mstr_rep\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mleft\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mright\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    597\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    598\u001b[0m     \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mleft\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_constructor\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnew_data\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mindex\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mleft\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcopy\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/.local/lib/python3.6/site-packages/pandas/core/computation/expressions.py\u001b[0m in \u001b[0;36mevaluate\u001b[0;34m(op, op_str, a, b, use_numexpr, **eval_kwargs)\u001b[0m\n\u001b[1;32m    218\u001b[0m     \u001b[0muse_numexpr\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0muse_numexpr\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0m_bool_arith_check\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mop_str\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0ma\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mb\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    219\u001b[0m     \u001b[0;32mif\u001b[0m \u001b[0muse_numexpr\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 220\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0m_evaluate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mop\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mop_str\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0ma\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mb\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0meval_kwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    221\u001b[0m     \u001b[0;32mreturn\u001b[0m \u001b[0m_evaluate_standard\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mop\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mop_str\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0ma\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mb\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    222\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/.local/lib/python3.6/site-packages/pandas/core/computation/expressions.py\u001b[0m in \u001b[0;36m_evaluate_standard\u001b[0;34m(op, op_str, a, b, **eval_kwargs)\u001b[0m\n\u001b[1;32m     68\u001b[0m         \u001b[0m_store_test_result\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     69\u001b[0m     \u001b[0;32mwith\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0merrstate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mall\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"ignore\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 70\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0mop\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ma\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mb\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     71\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     72\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/.local/lib/python3.6/site-packages/pandas/core/ops/__init__.py\u001b[0m in \u001b[0;36mcolumn_op\u001b[0;34m(a, b)\u001b[0m\n\u001b[1;32m    568\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    569\u001b[0m         \u001b[0;32mdef\u001b[0m \u001b[0mcolumn_op\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ma\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mb\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 570\u001b[0;31m             \u001b[0;32mreturn\u001b[0m \u001b[0;34m{\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ma\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0miloc\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mb\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ma\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    571\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    572\u001b[0m     \u001b[0;32melif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mright\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mABCDataFrame\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/.local/lib/python3.6/site-packages/pandas/core/ops/__init__.py\u001b[0m in \u001b[0;36m<dictcomp>\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m    568\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    569\u001b[0m         \u001b[0;32mdef\u001b[0m \u001b[0mcolumn_op\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ma\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mb\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 570\u001b[0;31m             \u001b[0;32mreturn\u001b[0m \u001b[0;34m{\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ma\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0miloc\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mb\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ma\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    571\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    572\u001b[0m     \u001b[0;32melif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mright\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mABCDataFrame\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/.local/lib/python3.6/site-packages/pandas/core/ops/__init__.py\u001b[0m in \u001b[0;36mwrapper\u001b[0;34m(self, other, axis)\u001b[0m\n\u001b[1;32m   1227\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1228\u001b[0m             \u001b[0;32mwith\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0merrstate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mall\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"ignore\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1229\u001b[0;31m                 \u001b[0mres\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mna_op\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mother\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1230\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0mis_scalar\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mres\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1231\u001b[0m                 raise TypeError(\n",
      "\u001b[0;32m~/.local/lib/python3.6/site-packages/pandas/core/ops/__init__.py\u001b[0m in \u001b[0;36mna_op\u001b[0;34m(x, y)\u001b[0m\n\u001b[1;32m   1089\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1090\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mis_object_dtype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1091\u001b[0;31m             \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_comp_method_OBJECT_ARRAY\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mop\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1092\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1093\u001b[0m         \u001b[0;32melif\u001b[0m \u001b[0mis_datetimelike_v_numeric\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/.local/lib/python3.6/site-packages/pandas/core/ops/__init__.py\u001b[0m in \u001b[0;36m_comp_method_OBJECT_ARRAY\u001b[0;34m(op, x, y)\u001b[0m\n\u001b[1;32m   1067\u001b[0m         \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlibops\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvec_compare\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mop\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1068\u001b[0m     \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1069\u001b[0;31m         \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlibops\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mscalar_compare\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mop\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1070\u001b[0m     \u001b[0;32mreturn\u001b[0m \u001b[0mresult\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1071\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32mpandas/_libs/ops.pyx\u001b[0m in \u001b[0;36mpandas._libs.ops.scalar_compare\u001b[0;34m()\u001b[0m\n",
      "\u001b[0;31mTypeError\u001b[0m: '>' not supported between instances of 'str' and 'int'"
     ]
    }
   ],
   "source": [
    "df_subset[df_subset>100]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Ship Mode</th>\n",
       "      <th>State</th>\n",
       "      <th>Sales</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Second Class</td>\n",
       "      <td>Kentucky</td>\n",
       "      <td>261.9600</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Second Class</td>\n",
       "      <td>Kentucky</td>\n",
       "      <td>731.9400</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Standard Class</td>\n",
       "      <td>Florida</td>\n",
       "      <td>957.5775</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>Standard Class</td>\n",
       "      <td>California</td>\n",
       "      <td>907.1520</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>Standard Class</td>\n",
       "      <td>California</td>\n",
       "      <td>114.9000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        Ship Mode       State     Sales\n",
       "0    Second Class    Kentucky  261.9600\n",
       "1    Second Class    Kentucky  731.9400\n",
       "3  Standard Class     Florida  957.5775\n",
       "7  Standard Class  California  907.1520\n",
       "9  Standard Class  California  114.9000"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_subset[df_subset['Sales']>100]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Ship Mode</th>\n",
       "      <th>State</th>\n",
       "      <th>Sales</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Second Class</td>\n",
       "      <td>Kentucky</td>\n",
       "      <td>261.9600</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Second Class</td>\n",
       "      <td>Kentucky</td>\n",
       "      <td>731.9400</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Standard Class</td>\n",
       "      <td>Florida</td>\n",
       "      <td>957.5775</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        Ship Mode     State     Sales\n",
       "0    Second Class  Kentucky  261.9600\n",
       "1    Second Class  Kentucky  731.9400\n",
       "3  Standard Class   Florida  957.5775"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_subset[(df_subset['State']!='California') & (df_subset['Sales']>100)]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Exercise 6: Setting and re-setting index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "The DataFrame\n",
      "-------------------------\n",
      "   Age  Height  Weight\n",
      "A   22      66     140\n",
      "B   42      70     148\n",
      "C   30      62     125\n",
      "D   35      68     160\n",
      "E   25      62     152\n",
      "\n",
      "After resetting index\n",
      "-----------------------------------\n",
      "  index  Age  Height  Weight\n",
      "0     A   22      66     140\n",
      "1     B   42      70     148\n",
      "2     C   30      62     125\n",
      "3     D   35      68     160\n",
      "4     E   25      62     152\n",
      "\n",
      "After resetting index with 'drop' option TRUE\n",
      "---------------------------------------------\n",
      "   Age  Height  Weight\n",
      "0   22      66     140\n",
      "1   42      70     148\n",
      "2   30      62     125\n",
      "3   35      68     160\n",
      "4   25      62     152\n",
      "\n",
      "Adding a new column 'Profession'\n",
      "---------------------------------------------\n",
      "   Age  Height  Weight Profession\n",
      "A   22      66     140    Student\n",
      "B   42      70     148    Teacher\n",
      "C   30      62     125   Engineer\n",
      "D   35      68     160     Doctor\n",
      "E   25      62     152      Nurse\n",
      "\n",
      "Setting 'Profession' column as index\n",
      "---------------------------------------------\n",
      "            Age  Height  Weight\n",
      "Profession                     \n",
      "Student      22      66     140\n",
      "Teacher      42      70     148\n",
      "Engineer     30      62     125\n",
      "Doctor       35      68     160\n",
      "Nurse        25      62     152\n"
     ]
    }
   ],
   "source": [
    "matrix_data = np.matrix('22,66,140;42,70,148;30,62,125;35,68,160;25,62,152')\n",
    "row_labels = ['A','B','C','D','E']\n",
    "column_headings = ['Age', 'Height', 'Weight']\n",
    "\n",
    "df1 = pd.DataFrame(data=matrix_data, index=row_labels, columns=column_headings)\n",
    "print(\"\\nThe DataFrame\\n\",'-'*25, sep='')\n",
    "print(df1)\n",
    "print(\"\\nAfter resetting index\\n\",'-'*35, sep='')\n",
    "print(df1.reset_index())\n",
    "print(\"\\nAfter resetting index with 'drop' option TRUE\\n\",'-'*45, sep='')\n",
    "print(df1.reset_index(drop=True))\n",
    "print(\"\\nAdding a new column 'Profession'\\n\",'-'*45, sep='')\n",
    "df1['Profession'] = \"Student Teacher Engineer Doctor Nurse\".split()\n",
    "print(df1)\n",
    "print(\"\\nSetting 'Profession' column as index\\n\",'-'*45, sep='')\n",
    "print (df1.set_index('Profession'))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Exercise 7: GroupBy method"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Ship Mode</th>\n",
       "      <th>State</th>\n",
       "      <th>Sales</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Second Class</td>\n",
       "      <td>Kentucky</td>\n",
       "      <td>261.9600</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Second Class</td>\n",
       "      <td>Kentucky</td>\n",
       "      <td>731.9400</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Second Class</td>\n",
       "      <td>California</td>\n",
       "      <td>14.6200</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Standard Class</td>\n",
       "      <td>Florida</td>\n",
       "      <td>957.5775</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Standard Class</td>\n",
       "      <td>Florida</td>\n",
       "      <td>22.3680</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>Standard Class</td>\n",
       "      <td>California</td>\n",
       "      <td>48.8600</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>Standard Class</td>\n",
       "      <td>California</td>\n",
       "      <td>7.2800</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>Standard Class</td>\n",
       "      <td>California</td>\n",
       "      <td>907.1520</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>Standard Class</td>\n",
       "      <td>California</td>\n",
       "      <td>18.5040</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>Standard Class</td>\n",
       "      <td>California</td>\n",
       "      <td>114.9000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        Ship Mode       State     Sales\n",
       "0    Second Class    Kentucky  261.9600\n",
       "1    Second Class    Kentucky  731.9400\n",
       "2    Second Class  California   14.6200\n",
       "3  Standard Class     Florida  957.5775\n",
       "4  Standard Class     Florida   22.3680\n",
       "5  Standard Class  California   48.8600\n",
       "6  Standard Class  California    7.2800\n",
       "7  Standard Class  California  907.1520\n",
       "8  Standard Class  California   18.5040\n",
       "9  Standard Class  California  114.9000"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_subset = df.loc[[i for i in range (10)],['Ship Mode','State','Sales']]\n",
    "df_subset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [],
   "source": [
    "byState = df_subset.groupby('State')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<pandas.core.groupby.generic.DataFrameGroupBy object at 0x7fe370f2ff60>"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "byState"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Grouping by 'State' column and listing mean sales\n",
      "--------------------------------------------------\n",
      "                 Sales\n",
      "State                 \n",
      "California  185.219333\n",
      "Florida     489.972750\n",
      "Kentucky    496.950000\n"
     ]
    }
   ],
   "source": [
    "print(\"\\nGrouping by 'State' column and listing mean sales\\n\",'-'*50, sep='')\n",
    "print(byState.mean())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Grouping by 'State' column and listing total sum of sales\n",
      "--------------------------------------------------\n",
      "                Sales\n",
      "State                \n",
      "California  1111.3160\n",
      "Florida      979.9455\n",
      "Kentucky     993.9000\n"
     ]
    }
   ],
   "source": [
    "print(\"\\nGrouping by 'State' column and listing total sum of sales\\n\",'-'*50, sep='')\n",
    "print(byState.sum())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "           Sales                                                              \n",
      "           count        mean         std   min     25%     50%    75%      max\n",
      "California   6.0  185.219333  355.889307  7.28  15.591  33.682  98.39  907.152\n"
     ]
    }
   ],
   "source": [
    "print(pd.DataFrame(df_subset.groupby('State').describe().loc['California']).transpose())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr:last-of-type th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th colspan=\"8\" halign=\"left\">Sales</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th>count</th>\n",
       "      <th>mean</th>\n",
       "      <th>std</th>\n",
       "      <th>min</th>\n",
       "      <th>25%</th>\n",
       "      <th>50%</th>\n",
       "      <th>75%</th>\n",
       "      <th>max</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Ship Mode</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Second Class</th>\n",
       "      <td>3.0</td>\n",
       "      <td>336.173333</td>\n",
       "      <td>364.373037</td>\n",
       "      <td>14.62</td>\n",
       "      <td>138.290</td>\n",
       "      <td>261.96</td>\n",
       "      <td>496.950</td>\n",
       "      <td>731.9400</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Standard Class</th>\n",
       "      <td>7.0</td>\n",
       "      <td>296.663071</td>\n",
       "      <td>435.947552</td>\n",
       "      <td>7.28</td>\n",
       "      <td>20.436</td>\n",
       "      <td>48.86</td>\n",
       "      <td>511.026</td>\n",
       "      <td>957.5775</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "               Sales                                                           \\\n",
       "               count        mean         std    min      25%     50%      75%   \n",
       "Ship Mode                                                                       \n",
       "Second Class     3.0  336.173333  364.373037  14.62  138.290  261.96  496.950   \n",
       "Standard Class   7.0  296.663071  435.947552   7.28   20.436   48.86  511.026   \n",
       "\n",
       "                          \n",
       "                     max  \n",
       "Ship Mode                 \n",
       "Second Class    731.9400  \n",
       "Standard Class  957.5775  "
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_subset.groupby('Ship Mode').describe().loc[['Second Class','Standard Class']]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>California</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"8\" valign=\"top\">Sales</th>\n",
       "      <th>count</th>\n",
       "      <td>6.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>185.219333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>355.889307</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>7.280000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>15.591000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>33.682000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>98.390000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>907.152000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "             California\n",
       "Sales count    6.000000\n",
       "      mean   185.219333\n",
       "      std    355.889307\n",
       "      min      7.280000\n",
       "      25%     15.591000\n",
       "      50%     33.682000\n",
       "      75%     98.390000\n",
       "      max    907.152000"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.DataFrame(byState.describe().loc['California'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [],
   "source": [
    "byStateCity=df.groupby(['State','City'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>count</th>\n",
       "      <th>mean</th>\n",
       "      <th>std</th>\n",
       "      <th>min</th>\n",
       "      <th>25%</th>\n",
       "      <th>50%</th>\n",
       "      <th>75%</th>\n",
       "      <th>max</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>State</th>\n",
       "      <th>City</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"8\" valign=\"top\">Alabama</th>\n",
       "      <th>Auburn</th>\n",
       "      <td>6.0</td>\n",
       "      <td>294.471667</td>\n",
       "      <td>361.914543</td>\n",
       "      <td>3.760</td>\n",
       "      <td>8.8050</td>\n",
       "      <td>182.0300</td>\n",
       "      <td>456.40750</td>\n",
       "      <td>900.080</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Decatur</th>\n",
       "      <td>13.0</td>\n",
       "      <td>259.601538</td>\n",
       "      <td>385.660903</td>\n",
       "      <td>14.940</td>\n",
       "      <td>23.9200</td>\n",
       "      <td>44.9500</td>\n",
       "      <td>239.92000</td>\n",
       "      <td>1215.920</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Florence</th>\n",
       "      <td>5.0</td>\n",
       "      <td>399.470000</td>\n",
       "      <td>796.488863</td>\n",
       "      <td>4.980</td>\n",
       "      <td>7.2700</td>\n",
       "      <td>12.4800</td>\n",
       "      <td>152.76000</td>\n",
       "      <td>1819.860</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Hoover</th>\n",
       "      <td>4.0</td>\n",
       "      <td>131.462500</td>\n",
       "      <td>230.646923</td>\n",
       "      <td>7.160</td>\n",
       "      <td>13.3925</td>\n",
       "      <td>20.7250</td>\n",
       "      <td>138.79500</td>\n",
       "      <td>477.240</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Huntsville</th>\n",
       "      <td>10.0</td>\n",
       "      <td>248.437000</td>\n",
       "      <td>419.576667</td>\n",
       "      <td>3.620</td>\n",
       "      <td>26.8700</td>\n",
       "      <td>81.9200</td>\n",
       "      <td>171.80750</td>\n",
       "      <td>1319.960</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Mobile</th>\n",
       "      <td>11.0</td>\n",
       "      <td>496.635455</td>\n",
       "      <td>914.087425</td>\n",
       "      <td>8.960</td>\n",
       "      <td>46.8600</td>\n",
       "      <td>70.9800</td>\n",
       "      <td>505.96500</td>\n",
       "      <td>3040.000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Montgomery</th>\n",
       "      <td>10.0</td>\n",
       "      <td>372.273000</td>\n",
       "      <td>475.397645</td>\n",
       "      <td>10.160</td>\n",
       "      <td>21.7075</td>\n",
       "      <td>187.2150</td>\n",
       "      <td>499.05500</td>\n",
       "      <td>1394.950</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Tuscaloosa</th>\n",
       "      <td>2.0</td>\n",
       "      <td>87.850000</td>\n",
       "      <td>76.523096</td>\n",
       "      <td>33.740</td>\n",
       "      <td>60.7950</td>\n",
       "      <td>87.8500</td>\n",
       "      <td>114.90500</td>\n",
       "      <td>141.960</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"13\" valign=\"top\">Arizona</th>\n",
       "      <th>Avondale</th>\n",
       "      <td>6.0</td>\n",
       "      <td>157.801333</td>\n",
       "      <td>288.247527</td>\n",
       "      <td>14.576</td>\n",
       "      <td>18.1480</td>\n",
       "      <td>35.5960</td>\n",
       "      <td>88.67800</td>\n",
       "      <td>742.336</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Bullhead City</th>\n",
       "      <td>2.0</td>\n",
       "      <td>11.144000</td>\n",
       "      <td>4.559425</td>\n",
       "      <td>7.920</td>\n",
       "      <td>9.5320</td>\n",
       "      <td>11.1440</td>\n",
       "      <td>12.75600</td>\n",
       "      <td>14.368</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Chandler</th>\n",
       "      <td>7.0</td>\n",
       "      <td>153.821000</td>\n",
       "      <td>305.283748</td>\n",
       "      <td>8.544</td>\n",
       "      <td>9.1200</td>\n",
       "      <td>49.7920</td>\n",
       "      <td>78.89750</td>\n",
       "      <td>842.376</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Gilbert</th>\n",
       "      <td>15.0</td>\n",
       "      <td>278.158800</td>\n",
       "      <td>346.945589</td>\n",
       "      <td>5.904</td>\n",
       "      <td>36.1240</td>\n",
       "      <td>82.3680</td>\n",
       "      <td>375.80700</td>\n",
       "      <td>1113.024</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Glendale</th>\n",
       "      <td>23.0</td>\n",
       "      <td>126.863696</td>\n",
       "      <td>225.003236</td>\n",
       "      <td>2.368</td>\n",
       "      <td>14.8760</td>\n",
       "      <td>42.9760</td>\n",
       "      <td>109.13200</td>\n",
       "      <td>933.536</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Mesa</th>\n",
       "      <td>28.0</td>\n",
       "      <td>144.205000</td>\n",
       "      <td>155.275947</td>\n",
       "      <td>4.368</td>\n",
       "      <td>31.7640</td>\n",
       "      <td>81.6515</td>\n",
       "      <td>202.90250</td>\n",
       "      <td>552.000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Peoria</th>\n",
       "      <td>16.0</td>\n",
       "      <td>83.834500</td>\n",
       "      <td>88.768365</td>\n",
       "      <td>4.536</td>\n",
       "      <td>14.8920</td>\n",
       "      <td>68.1540</td>\n",
       "      <td>92.42600</td>\n",
       "      <td>280.792</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Phoenix</th>\n",
       "      <td>63.0</td>\n",
       "      <td>174.607254</td>\n",
       "      <td>322.324198</td>\n",
       "      <td>1.408</td>\n",
       "      <td>12.8145</td>\n",
       "      <td>46.8720</td>\n",
       "      <td>193.96400</td>\n",
       "      <td>1879.960</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Scottsdale</th>\n",
       "      <td>12.0</td>\n",
       "      <td>122.192250</td>\n",
       "      <td>103.500825</td>\n",
       "      <td>4.401</td>\n",
       "      <td>30.1230</td>\n",
       "      <td>110.3040</td>\n",
       "      <td>186.89850</td>\n",
       "      <td>307.776</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Sierra Vista</th>\n",
       "      <td>3.0</td>\n",
       "      <td>25.357333</td>\n",
       "      <td>9.543341</td>\n",
       "      <td>14.368</td>\n",
       "      <td>22.2560</td>\n",
       "      <td>30.1440</td>\n",
       "      <td>30.85200</td>\n",
       "      <td>31.560</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Tempe</th>\n",
       "      <td>13.0</td>\n",
       "      <td>82.330923</td>\n",
       "      <td>119.755669</td>\n",
       "      <td>3.366</td>\n",
       "      <td>8.3760</td>\n",
       "      <td>12.7680</td>\n",
       "      <td>79.40000</td>\n",
       "      <td>318.400</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Tucson</th>\n",
       "      <td>32.0</td>\n",
       "      <td>197.281750</td>\n",
       "      <td>242.004135</td>\n",
       "      <td>4.272</td>\n",
       "      <td>31.3200</td>\n",
       "      <td>95.9890</td>\n",
       "      <td>243.54000</td>\n",
       "      <td>1023.936</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Yuma</th>\n",
       "      <td>4.0</td>\n",
       "      <td>210.216250</td>\n",
       "      <td>270.654379</td>\n",
       "      <td>10.496</td>\n",
       "      <td>36.2660</td>\n",
       "      <td>115.1920</td>\n",
       "      <td>289.14225</td>\n",
       "      <td>599.985</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"9\" valign=\"top\">Arkansas</th>\n",
       "      <th>Conway</th>\n",
       "      <td>1.0</td>\n",
       "      <td>301.960000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>301.960</td>\n",
       "      <td>301.9600</td>\n",
       "      <td>301.9600</td>\n",
       "      <td>301.96000</td>\n",
       "      <td>301.960</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Fayetteville</th>\n",
       "      <td>14.0</td>\n",
       "      <td>267.343571</td>\n",
       "      <td>482.533092</td>\n",
       "      <td>6.240</td>\n",
       "      <td>19.5525</td>\n",
       "      <td>75.0850</td>\n",
       "      <td>297.83750</td>\n",
       "      <td>1793.980</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Hot Springs</th>\n",
       "      <td>4.0</td>\n",
       "      <td>61.457500</td>\n",
       "      <td>65.669769</td>\n",
       "      <td>25.920</td>\n",
       "      <td>28.9725</td>\n",
       "      <td>29.9950</td>\n",
       "      <td>62.48000</td>\n",
       "      <td>159.920</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Jonesboro</th>\n",
       "      <td>11.0</td>\n",
       "      <td>265.029091</td>\n",
       "      <td>366.221274</td>\n",
       "      <td>6.630</td>\n",
       "      <td>23.1500</td>\n",
       "      <td>59.9800</td>\n",
       "      <td>439.16000</td>\n",
       "      <td>1067.940</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Little Rock</th>\n",
       "      <td>24.0</td>\n",
       "      <td>148.347917</td>\n",
       "      <td>206.445952</td>\n",
       "      <td>11.160</td>\n",
       "      <td>19.0700</td>\n",
       "      <td>60.9900</td>\n",
       "      <td>180.69000</td>\n",
       "      <td>881.930</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Pine Bluff</th>\n",
       "      <td>2.0</td>\n",
       "      <td>106.455000</td>\n",
       "      <td>132.221897</td>\n",
       "      <td>12.960</td>\n",
       "      <td>59.7075</td>\n",
       "      <td>106.4550</td>\n",
       "      <td>153.20250</td>\n",
       "      <td>199.950</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Rogers</th>\n",
       "      <td>1.0</td>\n",
       "      <td>40.410000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>40.410</td>\n",
       "      <td>40.4100</td>\n",
       "      <td>40.4100</td>\n",
       "      <td>40.41000</td>\n",
       "      <td>40.410</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Springdale</th>\n",
       "      <td>1.0</td>\n",
       "      <td>4.300000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>4.300</td>\n",
       "      <td>4.3000</td>\n",
       "      <td>4.3000</td>\n",
       "      <td>4.30000</td>\n",
       "      <td>4.300</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Texarkana</th>\n",
       "      <td>2.0</td>\n",
       "      <td>327.120000</td>\n",
       "      <td>393.462497</td>\n",
       "      <td>48.900</td>\n",
       "      <td>188.0100</td>\n",
       "      <td>327.1200</td>\n",
       "      <td>466.23000</td>\n",
       "      <td>605.340</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"15\" valign=\"top\">Washington</th>\n",
       "      <th>Bellingham</th>\n",
       "      <td>3.0</td>\n",
       "      <td>1263.413333</td>\n",
       "      <td>1327.859461</td>\n",
       "      <td>25.120</td>\n",
       "      <td>562.3100</td>\n",
       "      <td>1099.5000</td>\n",
       "      <td>1882.56000</td>\n",
       "      <td>2665.620</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Covington</th>\n",
       "      <td>4.0</td>\n",
       "      <td>103.420000</td>\n",
       "      <td>93.849114</td>\n",
       "      <td>29.900</td>\n",
       "      <td>42.6500</td>\n",
       "      <td>73.4500</td>\n",
       "      <td>134.22000</td>\n",
       "      <td>236.880</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Des Moines</th>\n",
       "      <td>7.0</td>\n",
       "      <td>493.491143</td>\n",
       "      <td>665.676105</td>\n",
       "      <td>18.540</td>\n",
       "      <td>86.7940</td>\n",
       "      <td>215.9760</td>\n",
       "      <td>623.18200</td>\n",
       "      <td>1799.970</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Edmonds</th>\n",
       "      <td>14.0</td>\n",
       "      <td>180.263714</td>\n",
       "      <td>344.045236</td>\n",
       "      <td>7.380</td>\n",
       "      <td>21.1450</td>\n",
       "      <td>54.0420</td>\n",
       "      <td>155.47250</td>\n",
       "      <td>1298.550</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Everett</th>\n",
       "      <td>1.0</td>\n",
       "      <td>3.856000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>3.856</td>\n",
       "      <td>3.8560</td>\n",
       "      <td>3.8560</td>\n",
       "      <td>3.85600</td>\n",
       "      <td>3.856</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Kent</th>\n",
       "      <td>8.0</td>\n",
       "      <td>168.906750</td>\n",
       "      <td>257.793525</td>\n",
       "      <td>19.440</td>\n",
       "      <td>38.4920</td>\n",
       "      <td>60.9880</td>\n",
       "      <td>168.21250</td>\n",
       "      <td>786.480</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Longview</th>\n",
       "      <td>3.0</td>\n",
       "      <td>39.736667</td>\n",
       "      <td>31.681749</td>\n",
       "      <td>18.240</td>\n",
       "      <td>21.5450</td>\n",
       "      <td>24.8500</td>\n",
       "      <td>50.48500</td>\n",
       "      <td>76.120</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Marysville</th>\n",
       "      <td>2.0</td>\n",
       "      <td>51.090000</td>\n",
       "      <td>59.778807</td>\n",
       "      <td>8.820</td>\n",
       "      <td>29.9550</td>\n",
       "      <td>51.0900</td>\n",
       "      <td>72.22500</td>\n",
       "      <td>93.360</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Olympia</th>\n",
       "      <td>5.0</td>\n",
       "      <td>204.089600</td>\n",
       "      <td>236.364267</td>\n",
       "      <td>14.030</td>\n",
       "      <td>45.6800</td>\n",
       "      <td>155.2500</td>\n",
       "      <td>201.56800</td>\n",
       "      <td>603.920</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Pasco</th>\n",
       "      <td>6.0</td>\n",
       "      <td>366.852000</td>\n",
       "      <td>356.325643</td>\n",
       "      <td>5.880</td>\n",
       "      <td>97.5540</td>\n",
       "      <td>352.3200</td>\n",
       "      <td>464.61000</td>\n",
       "      <td>975.920</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Redmond</th>\n",
       "      <td>3.0</td>\n",
       "      <td>18.410000</td>\n",
       "      <td>5.473783</td>\n",
       "      <td>12.320</td>\n",
       "      <td>16.1550</td>\n",
       "      <td>19.9900</td>\n",
       "      <td>21.45500</td>\n",
       "      <td>22.920</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Renton</th>\n",
       "      <td>3.0</td>\n",
       "      <td>414.210667</td>\n",
       "      <td>544.196636</td>\n",
       "      <td>51.840</td>\n",
       "      <td>101.3200</td>\n",
       "      <td>150.8000</td>\n",
       "      <td>595.39600</td>\n",
       "      <td>1039.992</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Seattle</th>\n",
       "      <td>428.0</td>\n",
       "      <td>279.300799</td>\n",
       "      <td>823.990115</td>\n",
       "      <td>1.344</td>\n",
       "      <td>20.6905</td>\n",
       "      <td>65.6400</td>\n",
       "      <td>201.21000</td>\n",
       "      <td>13999.960</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Spokane</th>\n",
       "      <td>7.0</td>\n",
       "      <td>289.702571</td>\n",
       "      <td>300.735758</td>\n",
       "      <td>23.840</td>\n",
       "      <td>103.6200</td>\n",
       "      <td>149.7300</td>\n",
       "      <td>404.75400</td>\n",
       "      <td>837.600</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Vancouver</th>\n",
       "      <td>5.0</td>\n",
       "      <td>137.367200</td>\n",
       "      <td>157.470820</td>\n",
       "      <td>9.640</td>\n",
       "      <td>14.8000</td>\n",
       "      <td>44.0200</td>\n",
       "      <td>302.37600</td>\n",
       "      <td>316.000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>West Virginia</th>\n",
       "      <th>Wheeling</th>\n",
       "      <td>4.0</td>\n",
       "      <td>302.456000</td>\n",
       "      <td>313.508627</td>\n",
       "      <td>6.240</td>\n",
       "      <td>63.3600</td>\n",
       "      <td>265.1200</td>\n",
       "      <td>504.21600</td>\n",
       "      <td>673.344</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"13\" valign=\"top\">Wisconsin</th>\n",
       "      <th>Appleton</th>\n",
       "      <td>2.0</td>\n",
       "      <td>835.655000</td>\n",
       "      <td>1151.304190</td>\n",
       "      <td>21.560</td>\n",
       "      <td>428.6075</td>\n",
       "      <td>835.6550</td>\n",
       "      <td>1242.70250</td>\n",
       "      <td>1649.750</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Eau Claire</th>\n",
       "      <td>6.0</td>\n",
       "      <td>274.275000</td>\n",
       "      <td>237.093940</td>\n",
       "      <td>32.560</td>\n",
       "      <td>117.6700</td>\n",
       "      <td>217.7700</td>\n",
       "      <td>364.37000</td>\n",
       "      <td>680.010</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Franklin</th>\n",
       "      <td>9.0</td>\n",
       "      <td>530.472222</td>\n",
       "      <td>715.606149</td>\n",
       "      <td>3.600</td>\n",
       "      <td>106.0500</td>\n",
       "      <td>171.5500</td>\n",
       "      <td>392.94000</td>\n",
       "      <td>1951.840</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Green Bay</th>\n",
       "      <td>4.0</td>\n",
       "      <td>131.830000</td>\n",
       "      <td>224.726888</td>\n",
       "      <td>16.740</td>\n",
       "      <td>18.4050</td>\n",
       "      <td>20.8400</td>\n",
       "      <td>134.26500</td>\n",
       "      <td>468.900</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Kenosha</th>\n",
       "      <td>9.0</td>\n",
       "      <td>434.081111</td>\n",
       "      <td>301.753460</td>\n",
       "      <td>14.980</td>\n",
       "      <td>139.9500</td>\n",
       "      <td>399.9500</td>\n",
       "      <td>699.98000</td>\n",
       "      <td>860.930</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>La Crosse</th>\n",
       "      <td>5.0</td>\n",
       "      <td>166.082000</td>\n",
       "      <td>200.481230</td>\n",
       "      <td>3.040</td>\n",
       "      <td>56.8200</td>\n",
       "      <td>68.6400</td>\n",
       "      <td>201.96000</td>\n",
       "      <td>499.950</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Madison</th>\n",
       "      <td>10.0</td>\n",
       "      <td>534.679000</td>\n",
       "      <td>875.980874</td>\n",
       "      <td>1.810</td>\n",
       "      <td>32.3000</td>\n",
       "      <td>119.5850</td>\n",
       "      <td>614.39000</td>\n",
       "      <td>2807.840</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Milwaukee</th>\n",
       "      <td>45.0</td>\n",
       "      <td>253.560444</td>\n",
       "      <td>376.458437</td>\n",
       "      <td>5.820</td>\n",
       "      <td>29.3400</td>\n",
       "      <td>92.5200</td>\n",
       "      <td>272.40000</td>\n",
       "      <td>1526.560</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Sheboygan</th>\n",
       "      <td>4.0</td>\n",
       "      <td>19.935000</td>\n",
       "      <td>15.126715</td>\n",
       "      <td>1.980</td>\n",
       "      <td>11.7225</td>\n",
       "      <td>20.0850</td>\n",
       "      <td>28.29750</td>\n",
       "      <td>37.590</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Superior</th>\n",
       "      <td>9.0</td>\n",
       "      <td>144.414444</td>\n",
       "      <td>213.394065</td>\n",
       "      <td>5.560</td>\n",
       "      <td>17.1200</td>\n",
       "      <td>47.4000</td>\n",
       "      <td>125.99000</td>\n",
       "      <td>629.100</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Waukesha</th>\n",
       "      <td>1.0</td>\n",
       "      <td>54.500000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>54.500</td>\n",
       "      <td>54.5000</td>\n",
       "      <td>54.5000</td>\n",
       "      <td>54.50000</td>\n",
       "      <td>54.500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Wausau</th>\n",
       "      <td>4.0</td>\n",
       "      <td>79.370000</td>\n",
       "      <td>111.450605</td>\n",
       "      <td>12.390</td>\n",
       "      <td>20.0325</td>\n",
       "      <td>29.6050</td>\n",
       "      <td>88.94250</td>\n",
       "      <td>245.880</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>West Allis</th>\n",
       "      <td>2.0</td>\n",
       "      <td>125.240000</td>\n",
       "      <td>165.067007</td>\n",
       "      <td>8.520</td>\n",
       "      <td>66.8800</td>\n",
       "      <td>125.2400</td>\n",
       "      <td>183.60000</td>\n",
       "      <td>241.960</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Wyoming</th>\n",
       "      <th>Cheyenne</th>\n",
       "      <td>1.0</td>\n",
       "      <td>1603.136000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1603.136</td>\n",
       "      <td>1603.1360</td>\n",
       "      <td>1603.1360</td>\n",
       "      <td>1603.13600</td>\n",
       "      <td>1603.136</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>604 rows × 8 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                      count         mean         std       min        25%  \\\n",
       "State     City                                                              \n",
       "Alabama   Auburn        6.0   294.471667  361.914543     3.760     8.8050   \n",
       "          Decatur      13.0   259.601538  385.660903    14.940    23.9200   \n",
       "          Florence      5.0   399.470000  796.488863     4.980     7.2700   \n",
       "          Hoover        4.0   131.462500  230.646923     7.160    13.3925   \n",
       "          Huntsville   10.0   248.437000  419.576667     3.620    26.8700   \n",
       "...                     ...          ...         ...       ...        ...   \n",
       "Wisconsin Superior      9.0   144.414444  213.394065     5.560    17.1200   \n",
       "          Waukesha      1.0    54.500000         NaN    54.500    54.5000   \n",
       "          Wausau        4.0    79.370000  111.450605    12.390    20.0325   \n",
       "          West Allis    2.0   125.240000  165.067007     8.520    66.8800   \n",
       "Wyoming   Cheyenne      1.0  1603.136000         NaN  1603.136  1603.1360   \n",
       "\n",
       "                           50%        75%       max  \n",
       "State     City                                       \n",
       "Alabama   Auburn       182.030   456.4075   900.080  \n",
       "          Decatur       44.950   239.9200  1215.920  \n",
       "          Florence      12.480   152.7600  1819.860  \n",
       "          Hoover        20.725   138.7950   477.240  \n",
       "          Huntsville    81.920   171.8075  1319.960  \n",
       "...                        ...        ...       ...  \n",
       "Wisconsin Superior      47.400   125.9900   629.100  \n",
       "          Waukesha      54.500    54.5000    54.500  \n",
       "          Wausau        29.605    88.9425   245.880  \n",
       "          West Allis   125.240   183.6000   241.960  \n",
       "Wyoming   Cheyenne    1603.136  1603.1360  1603.136  \n",
       "\n",
       "[604 rows x 8 columns]"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "byStateCity.describe()['Sales']"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Exercise 8: Missing values in Pandas"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_missing=pd.read_excel(\"Sample - Superstore.xls\",sheet_name=\"Missing\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Customer</th>\n",
       "      <th>Product</th>\n",
       "      <th>Sales</th>\n",
       "      <th>Quantity</th>\n",
       "      <th>Discount</th>\n",
       "      <th>Profit</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Brosina Hoffman</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1706.184</td>\n",
       "      <td>9.0</td>\n",
       "      <td>0.2</td>\n",
       "      <td>85.3092</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Brosina Hoffman</td>\n",
       "      <td>Phones</td>\n",
       "      <td>911.424</td>\n",
       "      <td>4.0</td>\n",
       "      <td>0.2</td>\n",
       "      <td>68.3568</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Zuschuss Donatelli</td>\n",
       "      <td>Art</td>\n",
       "      <td>8.560</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.4824</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Zuschuss Donatelli</td>\n",
       "      <td>Phones</td>\n",
       "      <td>NaN</td>\n",
       "      <td>3.0</td>\n",
       "      <td>0.2</td>\n",
       "      <td>16.0110</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Zuschuss Donatelli</td>\n",
       "      <td>Binders</td>\n",
       "      <td>22.720</td>\n",
       "      <td>4.0</td>\n",
       "      <td>0.2</td>\n",
       "      <td>7.3840</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>Eric Hoffmann</td>\n",
       "      <td>Binders</td>\n",
       "      <td>11.648</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.2</td>\n",
       "      <td>4.2224</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>Eric Hoffmann</td>\n",
       "      <td>Accessories</td>\n",
       "      <td>90.570</td>\n",
       "      <td>3.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>11.7741</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>Ruben Ausman</td>\n",
       "      <td>NaN</td>\n",
       "      <td>77.880</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>NaN</td>\n",
       "      <td>Accessories</td>\n",
       "      <td>13.980</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>6.1512</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>Kunst Miller</td>\n",
       "      <td>Binders</td>\n",
       "      <td>25.824</td>\n",
       "      <td>6.0</td>\n",
       "      <td>0.2</td>\n",
       "      <td>9.3612</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>Kunst Miller</td>\n",
       "      <td>Paper</td>\n",
       "      <td>146.730</td>\n",
       "      <td>3.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>68.9631</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "              Customer      Product     Sales  Quantity  Discount   Profit\n",
       "0      Brosina Hoffman          NaN  1706.184       9.0       0.2  85.3092\n",
       "1      Brosina Hoffman       Phones   911.424       4.0       0.2  68.3568\n",
       "2   Zuschuss Donatelli          Art     8.560       2.0       0.0   2.4824\n",
       "3   Zuschuss Donatelli       Phones       NaN       3.0       0.2  16.0110\n",
       "4   Zuschuss Donatelli      Binders    22.720       4.0       0.2   7.3840\n",
       "5        Eric Hoffmann      Binders    11.648       NaN       0.2   4.2224\n",
       "6        Eric Hoffmann  Accessories    90.570       3.0       0.0  11.7741\n",
       "7         Ruben Ausman          NaN    77.880       2.0       0.0      NaN\n",
       "8                  NaN  Accessories    13.980       2.0       0.0   6.1512\n",
       "9         Kunst Miller      Binders    25.824       6.0       0.2   9.3612\n",
       "10        Kunst Miller        Paper   146.730       3.0       0.0  68.9631"
      ]
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_missing"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Customer</th>\n",
       "      <th>Product</th>\n",
       "      <th>Sales</th>\n",
       "      <th>Quantity</th>\n",
       "      <th>Discount</th>\n",
       "      <th>Profit</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    Customer  Product  Sales  Quantity  Discount  Profit\n",
       "0      False     True  False     False     False   False\n",
       "1      False    False  False     False     False   False\n",
       "2      False    False  False     False     False   False\n",
       "3      False    False   True     False     False   False\n",
       "4      False    False  False     False     False   False\n",
       "5      False    False  False      True     False   False\n",
       "6      False    False  False     False     False   False\n",
       "7      False     True  False     False     False    True\n",
       "8       True    False  False     False     False   False\n",
       "9      False    False  False     False     False   False\n",
       "10     False    False  False     False     False   False"
      ]
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_missing.isnull()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Customer has 1 missing value(s)\n",
      "Product has 2 missing value(s)\n",
      "Sales has 1 missing value(s)\n",
      "Quantity has 1 missing value(s)\n",
      "Discount has NO missing value!\n",
      "Profit has 1 missing value(s)\n"
     ]
    }
   ],
   "source": [
    "for c in df_missing.columns:\n",
    "    miss = df_missing[c].isnull().sum()\n",
    "    if miss>0:\n",
    "        print(\"{} has {} missing value(s)\".format(c,miss))\n",
    "    else:\n",
    "        print(\"{} has NO missing value!\".format(c))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Exercise 9: Filling missing values with `fillna()`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Customer</th>\n",
       "      <th>Product</th>\n",
       "      <th>Sales</th>\n",
       "      <th>Quantity</th>\n",
       "      <th>Discount</th>\n",
       "      <th>Profit</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Brosina Hoffman</td>\n",
       "      <td>FILL</td>\n",
       "      <td>1706.18</td>\n",
       "      <td>9</td>\n",
       "      <td>0.2</td>\n",
       "      <td>85.3092</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Brosina Hoffman</td>\n",
       "      <td>Phones</td>\n",
       "      <td>911.424</td>\n",
       "      <td>4</td>\n",
       "      <td>0.2</td>\n",
       "      <td>68.3568</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Zuschuss Donatelli</td>\n",
       "      <td>Art</td>\n",
       "      <td>8.56</td>\n",
       "      <td>2</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.4824</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Zuschuss Donatelli</td>\n",
       "      <td>Phones</td>\n",
       "      <td>FILL</td>\n",
       "      <td>3</td>\n",
       "      <td>0.2</td>\n",
       "      <td>16.011</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Zuschuss Donatelli</td>\n",
       "      <td>Binders</td>\n",
       "      <td>22.72</td>\n",
       "      <td>4</td>\n",
       "      <td>0.2</td>\n",
       "      <td>7.384</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>Eric Hoffmann</td>\n",
       "      <td>Binders</td>\n",
       "      <td>11.648</td>\n",
       "      <td>FILL</td>\n",
       "      <td>0.2</td>\n",
       "      <td>4.2224</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>Eric Hoffmann</td>\n",
       "      <td>Accessories</td>\n",
       "      <td>90.57</td>\n",
       "      <td>3</td>\n",
       "      <td>0.0</td>\n",
       "      <td>11.7741</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>Ruben Ausman</td>\n",
       "      <td>FILL</td>\n",
       "      <td>77.88</td>\n",
       "      <td>2</td>\n",
       "      <td>0.0</td>\n",
       "      <td>FILL</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>FILL</td>\n",
       "      <td>Accessories</td>\n",
       "      <td>13.98</td>\n",
       "      <td>2</td>\n",
       "      <td>0.0</td>\n",
       "      <td>6.1512</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>Kunst Miller</td>\n",
       "      <td>Binders</td>\n",
       "      <td>25.824</td>\n",
       "      <td>6</td>\n",
       "      <td>0.2</td>\n",
       "      <td>9.3612</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>Kunst Miller</td>\n",
       "      <td>Paper</td>\n",
       "      <td>146.73</td>\n",
       "      <td>3</td>\n",
       "      <td>0.0</td>\n",
       "      <td>68.9631</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "              Customer      Product    Sales Quantity  Discount   Profit\n",
       "0      Brosina Hoffman         FILL  1706.18        9       0.2  85.3092\n",
       "1      Brosina Hoffman       Phones  911.424        4       0.2  68.3568\n",
       "2   Zuschuss Donatelli          Art     8.56        2       0.0   2.4824\n",
       "3   Zuschuss Donatelli       Phones     FILL        3       0.2   16.011\n",
       "4   Zuschuss Donatelli      Binders    22.72        4       0.2    7.384\n",
       "5        Eric Hoffmann      Binders   11.648     FILL       0.2   4.2224\n",
       "6        Eric Hoffmann  Accessories    90.57        3       0.0  11.7741\n",
       "7         Ruben Ausman         FILL    77.88        2       0.0     FILL\n",
       "8                 FILL  Accessories    13.98        2       0.0   6.1512\n",
       "9         Kunst Miller      Binders   25.824        6       0.2   9.3612\n",
       "10        Kunst Miller        Paper   146.73        3       0.0  68.9631"
      ]
     },
     "execution_count": 37,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_missing.fillna('FILL')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Customer</th>\n",
       "      <th>Product</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Brosina Hoffman</td>\n",
       "      <td>FILL</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Brosina Hoffman</td>\n",
       "      <td>Phones</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Zuschuss Donatelli</td>\n",
       "      <td>Art</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Zuschuss Donatelli</td>\n",
       "      <td>Phones</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Zuschuss Donatelli</td>\n",
       "      <td>Binders</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>Eric Hoffmann</td>\n",
       "      <td>Binders</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>Eric Hoffmann</td>\n",
       "      <td>Accessories</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>Ruben Ausman</td>\n",
       "      <td>FILL</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>FILL</td>\n",
       "      <td>Accessories</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>Kunst Miller</td>\n",
       "      <td>Binders</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>Kunst Miller</td>\n",
       "      <td>Paper</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "              Customer      Product\n",
       "0      Brosina Hoffman         FILL\n",
       "1      Brosina Hoffman       Phones\n",
       "2   Zuschuss Donatelli          Art\n",
       "3   Zuschuss Donatelli       Phones\n",
       "4   Zuschuss Donatelli      Binders\n",
       "5        Eric Hoffmann      Binders\n",
       "6        Eric Hoffmann  Accessories\n",
       "7         Ruben Ausman         FILL\n",
       "8                 FILL  Accessories\n",
       "9         Kunst Miller      Binders\n",
       "10        Kunst Miller        Paper"
      ]
     },
     "execution_count": 38,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_missing[['Customer','Product']].fillna('FILL')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0     1706.184\n",
       "1      911.424\n",
       "2        8.560\n",
       "3        8.560\n",
       "4       22.720\n",
       "5       11.648\n",
       "6       90.570\n",
       "7       77.880\n",
       "8       13.980\n",
       "9       25.824\n",
       "10     146.730\n",
       "Name: Sales, dtype: float64"
      ]
     },
     "execution_count": 39,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_missing['Sales'].fillna(method='ffill')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0     1706.184\n",
       "1      911.424\n",
       "2        8.560\n",
       "3       22.720\n",
       "4       22.720\n",
       "5       11.648\n",
       "6       90.570\n",
       "7       77.880\n",
       "8       13.980\n",
       "9       25.824\n",
       "10     146.730\n",
       "Name: Sales, dtype: float64"
      ]
     },
     "execution_count": 40,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_missing['Sales'].fillna(method='bfill')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0     1706.184\n",
       "1      911.424\n",
       "2        8.560\n",
       "3      301.552\n",
       "4       22.720\n",
       "5       11.648\n",
       "6       90.570\n",
       "7       77.880\n",
       "8       13.980\n",
       "9       25.824\n",
       "10     146.730\n",
       "Name: Sales, dtype: float64"
      ]
     },
     "execution_count": 41,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_missing['Sales'].fillna(df_missing.mean()['Sales'])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Exercise 10: Dropping missing values with `dropna()`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Customer</th>\n",
       "      <th>Product</th>\n",
       "      <th>Sales</th>\n",
       "      <th>Quantity</th>\n",
       "      <th>Discount</th>\n",
       "      <th>Profit</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Brosina Hoffman</td>\n",
       "      <td>Phones</td>\n",
       "      <td>911.424</td>\n",
       "      <td>4.0</td>\n",
       "      <td>0.2</td>\n",
       "      <td>68.3568</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Zuschuss Donatelli</td>\n",
       "      <td>Art</td>\n",
       "      <td>8.560</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.4824</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Zuschuss Donatelli</td>\n",
       "      <td>Binders</td>\n",
       "      <td>22.720</td>\n",
       "      <td>4.0</td>\n",
       "      <td>0.2</td>\n",
       "      <td>7.3840</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>Eric Hoffmann</td>\n",
       "      <td>Accessories</td>\n",
       "      <td>90.570</td>\n",
       "      <td>3.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>11.7741</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>Kunst Miller</td>\n",
       "      <td>Binders</td>\n",
       "      <td>25.824</td>\n",
       "      <td>6.0</td>\n",
       "      <td>0.2</td>\n",
       "      <td>9.3612</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>Kunst Miller</td>\n",
       "      <td>Paper</td>\n",
       "      <td>146.730</td>\n",
       "      <td>3.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>68.9631</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "              Customer      Product    Sales  Quantity  Discount   Profit\n",
       "1      Brosina Hoffman       Phones  911.424       4.0       0.2  68.3568\n",
       "2   Zuschuss Donatelli          Art    8.560       2.0       0.0   2.4824\n",
       "4   Zuschuss Donatelli      Binders   22.720       4.0       0.2   7.3840\n",
       "6        Eric Hoffmann  Accessories   90.570       3.0       0.0  11.7741\n",
       "9         Kunst Miller      Binders   25.824       6.0       0.2   9.3612\n",
       "10        Kunst Miller        Paper  146.730       3.0       0.0  68.9631"
      ]
     },
     "execution_count": 42,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_missing.dropna(axis=0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Discount</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>0.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>0.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    Discount\n",
       "0        0.2\n",
       "1        0.2\n",
       "2        0.0\n",
       "3        0.2\n",
       "4        0.2\n",
       "5        0.2\n",
       "6        0.0\n",
       "7        0.0\n",
       "8        0.0\n",
       "9        0.2\n",
       "10       0.0"
      ]
     },
     "execution_count": 43,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_missing.dropna(axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Customer</th>\n",
       "      <th>Sales</th>\n",
       "      <th>Quantity</th>\n",
       "      <th>Discount</th>\n",
       "      <th>Profit</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Brosina Hoffman</td>\n",
       "      <td>1706.184</td>\n",
       "      <td>9.0</td>\n",
       "      <td>0.2</td>\n",
       "      <td>85.3092</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Brosina Hoffman</td>\n",
       "      <td>911.424</td>\n",
       "      <td>4.0</td>\n",
       "      <td>0.2</td>\n",
       "      <td>68.3568</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Zuschuss Donatelli</td>\n",
       "      <td>8.560</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.4824</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Zuschuss Donatelli</td>\n",
       "      <td>NaN</td>\n",
       "      <td>3.0</td>\n",
       "      <td>0.2</td>\n",
       "      <td>16.0110</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Zuschuss Donatelli</td>\n",
       "      <td>22.720</td>\n",
       "      <td>4.0</td>\n",
       "      <td>0.2</td>\n",
       "      <td>7.3840</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>Eric Hoffmann</td>\n",
       "      <td>11.648</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.2</td>\n",
       "      <td>4.2224</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>Eric Hoffmann</td>\n",
       "      <td>90.570</td>\n",
       "      <td>3.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>11.7741</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>Ruben Ausman</td>\n",
       "      <td>77.880</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>NaN</td>\n",
       "      <td>13.980</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>6.1512</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>Kunst Miller</td>\n",
       "      <td>25.824</td>\n",
       "      <td>6.0</td>\n",
       "      <td>0.2</td>\n",
       "      <td>9.3612</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>Kunst Miller</td>\n",
       "      <td>146.730</td>\n",
       "      <td>3.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>68.9631</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "              Customer     Sales  Quantity  Discount   Profit\n",
       "0      Brosina Hoffman  1706.184       9.0       0.2  85.3092\n",
       "1      Brosina Hoffman   911.424       4.0       0.2  68.3568\n",
       "2   Zuschuss Donatelli     8.560       2.0       0.0   2.4824\n",
       "3   Zuschuss Donatelli       NaN       3.0       0.2  16.0110\n",
       "4   Zuschuss Donatelli    22.720       4.0       0.2   7.3840\n",
       "5        Eric Hoffmann    11.648       NaN       0.2   4.2224\n",
       "6        Eric Hoffmann    90.570       3.0       0.0  11.7741\n",
       "7         Ruben Ausman    77.880       2.0       0.0      NaN\n",
       "8                  NaN    13.980       2.0       0.0   6.1512\n",
       "9         Kunst Miller    25.824       6.0       0.2   9.3612\n",
       "10        Kunst Miller   146.730       3.0       0.0  68.9631"
      ]
     },
     "execution_count": 44,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_missing.dropna(axis=1,thresh=10)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Exercise 11: Outlier detection using simple statistical test"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_sample = df[['Customer Name','State','Sales','Profit']].sample(n=50).copy()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/tirtha/.local/lib/python3.6/site-packages/pandas/core/indexing.py:202: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame\n",
      "\n",
      "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  self._setitem_with_indexer(indexer, value)\n"
     ]
    }
   ],
   "source": [
    "# Assign a wrong (negative value) in few places\n",
    "df_sample['Sales'].iloc[5]=-1000.0\n",
    "df_sample['Sales'].iloc[15]=-500.0"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZMAAAEPCAYAAACHuClZAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAgAElEQVR4nO3dfZyVdZ3/8debQVTAGzJDTYVMWwemXd2wLSVjZFPpRlvLtbF215zfUrsL7mbl3WSB7Rh0o1uwrWJT6m6MWpo3GZHAjDWrtj/MUmJIf79EI82kBmEAQfCzf1zXwYvjGZiZ65w5MPN+Ph7nMXO+1/f6XN9z5przOd+b6xxFBGZmZnkMq3YDzMxs7+dkYmZmuTmZmJlZbk4mZmaWm5OJmZnl5mRiZma5OZnYDpJmSYrMbZOkxyRN3wPatbaP+4xI9zuhjO2olfQTSRvT52d8uWKXONZqSV+uVPyBImmGpD3i+gNJZ0vqlLRV0uq0LCTNyNSZLun9VWvkXmx4tRtge5wXgDPT30cB7wOul9QdEQur16w+GwF8DlgN/LxMMb8EHAycBWwEni1TXKswSTXAzcAi4O9J/n4AbweezFSdDqwA7hzQBg4CTiZWbFtEPJS5v1TSycD7gb0pmVTC8cDdEbG02g0xkLR/RGzuZfXDgQOBhRHRUSgsOtctBw9zWW9sAPbJFkh6g6Q7Ja2XtEHSPZKOzWw/V9LLkqZmysan9Zsz90PS+ZL+M43ze0mf212Ddnf8tM0A38oM243fRbwTJC1Nh/a6JH1b0thsO4E3Ap9IY7XvIlajpJWSNktaK+l+SRMz2+ekw4fdktakxzqsF4/5HWmsTZL+IOkGSQdkth8s6RuSnpH0oqSnJd2wm5jvkXRf+ryvl/SQpNOL6sxKH8eJ6fZNkh6R9I6ievtKmi9pnaQ/SrqWovOmhzYU4p8i6Wdp238uaXJRvdWSviLpSklrgPWZbX+dPqdbJP1GUrOk4em2C4DfpFXvSv9+s9JtO4a50r/pW4C/y5wzF+yu/ZaKCN98IyIAZgFrSXqsw0neyX0E2Ab8babevsCvgV8B5wEfIBka+C3wmky9VuCpNI6ANuAXwIh0+3gg0v2uB84AmoGXgX8qbldfjg/Up7E/D7wtve3bw+M+FFgHPEjSA/sIsAZ4lGS4bN90/2eBb6e/T+gh1qnAS8DlwBSSIbEvACdn6nwTaADeCXwwPe5KYFimzmrgy5n7pwBbgFuBdwN/kz7e7xbFXZU+J+9MH8eC3fzNZwAXpc/9u4BrgO3AKUXP/6b0+fgoMA14CHgeGJmpdy3wIvDJtM4d6fMYvTjvNpEMN00nGVptJ3lDcFjRc/IssCR9Xs9Jy09P/9Y3kQzRXpI+V9dl/r5/ldb5ZPr3OzLdFsCM9PcJQCdwb+acObTa/5d7y63qDfBtz7ml/9RR4vbVonofJ0kwx2TKjgS2Apdnyl4DPAO0pC9YW4A/y2wfn8b/UVH8G9IXymGZdq3ty/GB0WnsC3rxuOeQJJMDM2V/ke7fkClbTeYFvodYnwIe7sNzXgO8Pj3WqT0dC/gJ0Fa072npfnXp/RXAzBx//2EkbyIWA98scV6clik7IS07M71/CLAZuLQo3ip6l0wCOD9TNhr4IzCn6Dl5FtivaP+HSjw3l5AkxULSKJxr7y2qtyOZpPeXAzcO1P/cYLp5mMuKvQCclN4mA/9M0u3PDj29FfhZRPy6UBARa4D/TvcplP2RZLLzQpLJ66si4hcljvm9ovt3AEeQJIhSenX8PngrSULbMWwSET8lefHqa7yfAydKulbSqZJGFFeQNE3SA5JeIEmKa9JNbyoVUNJIkoni2yQNL9yADpJe0Fsyx/60pH+UVDJWidhHSrpJ0m/TtrxE8k6/eP+tJL2FgpXpz8Lf6M3AfsBdhQoR8XL2fi/sOA8iohu4j+Rvk7U0Il7MtL8G+HPgO0X1biVJZm/vw/EtBycTK7YtIpant/+OiK8BVwFXSHpNWudw4LkS+z5H0hvJWpaWDyPpcZTy+x7uH95D/b4cvzfKFi8ilpAMBZ1K8uK7VtK/SxoFIOkk4G6SBPI3JC92b0t336+HsGNIejBfJ3mxL9y2kMxJHJXWm0GyCumzwK8kPSHpQz21VdKwtC0np/vUk7yJWFSiLRvS5FB4nFuL2lyY8+npb7k73fHqyfTf8+pzoPjv9FqS56C4vHC/P+eD9YNXc1lvdJLMHbyRZOjhWWBiiXpj0+1Zc0heCH8H/Btwfon9XtfD/Z6W3vbl+L3xbIk2FOI93NdgEXETcJOkQ4FzSOYSNgCXkYzdPw+cF+m4iqRxuwm5jmQ4ZhbwgxLbn0mPu45kOPEiSX9KMtTzbUmPRsTKEvsdC5wITIuIHxYKJe3fy4ea9bv05+vY+W9Q6nktZbRevTrrdbz6HCi+ZmUtSWItPs7Y9Gd/zgfrB/dMrDfq0p+FFTE/Bd4i6Q2FCpJeT/IOtyNTNgWYCfwD0Ag0SPpAifh/VXT/HJIXkTUl6vb2+MXvnHflp8AZRSujTiIZZ+/oaafdiYjnI+J6kvmOCWnx/sBLhUSS+vBu4mwkmRf4k0yvMXt7psQ+jwKfJvkfP76H0IWksaVQkCa2U3rx8Io9RjL5fnYm1rDs/V7YcR5IGk2yIOB/drVDRGwnSfjnFm36a5KFHA/24fiQnDe9OWesiHsmVmy4pMKwywiS8fjPAHdFROHd543ApcAiSZ8lmej8HMm7xOthx4vBN4FbI+K7adn1wH9I+nFEPJ855sR02+0kw0ONwD9nh1WK7Pb4EbFV0pPAX0taQfJC92hmeCbrGpKEt1jSXJLJ3zkkL5C37/4pe4Wk2SRDK+1pe04kWVl1WVrlPuBfJP0bcA9JAvxIL0JfQnLNz8vAd0l6OkcD7wGaIuJxSR0k8w4rSN7BFy7O6+kFeRVJwv6KpCuBA4DZJIsf+iQi/iBpATBb0jbgl+nxR/cyxGagOT1vniFZyDAC+Gov9v0cyd/uW8AtJPM3nwduSOfS+mIVyRuLM4A/AE9GxB/6GGNoqvYKAN/2nBuvXs21FXgCmAscUFT3GJLx+Q1AN/B94LjM9utJehfZpcKjSZb03p7eH58e58Mky4g3kAwBzQZU1K61fTl+Wud0kuWsL6bHGb+Lx34iyfzOJpJhpYXA2KI6q9n9aq73AkvTx/EiyfLly4oezyUkvbyNJMtcj+PVq4pedSySFWY/JLm+YiPJJPg1wEHp9i+RJMAN6WNoA96xm/aeRJJsNqd/6wtIkvXyXT3/aXlxm/clmdd5AegC5gEX07vVXGuBd5AsIthCsoT81KJ6PT7/JMuhHyM5Z9eQLDEfntleONd2t5rrmPRv8gK9XA3oW3JT+gSaDTglFxE+CbwvIr5f3dZYtaQXEM6IiNdWuy3Wf54zMTOz3JxMzMwsNw9zmZlZbu6ZmJlZbkN2afBrX/vaGD9+fLWbMShs3LiRUaNGVbsZZiX5/Cyvhx9+eG1EHFpcPmSTyfjx41m+fHm1mzEotLe3M2XKlGo3w6wkn5/lJempUuUe5jIzs9ycTMzMLDcnEzMzy83JxMzMcnMyMTOz3JxMrN9aW1upq6tj6tSp1NXV0draWu0mmVmVDNmlwZZPa2srTU1NtLS0sH37dmpqamhsbASgoaGhyq0zs4Hmnon1S3NzMy0tLdTX1zN8+HDq6+tpaWmhubm52k0zsypwMrF+6ezsZPLkyTuVTZ48mc7Oziq1yMyqycnE+qW2tpaOjp2/0bajo4Pa2toqtcjMqsnJxPqlqamJxsZG2tra2LZtG21tbTQ2NtLU1FTtpplZFXgC3vqlMMk+c+ZMOjs7qa2tpbm52ZPvZkOUk4n1W0NDAw0NDf4gPTPzMJeZmeXnZGJmZrk5mZiZWW5OJmZmlpuTiZmZ5eZkYmZmuTmZmJlZbk4mZmaWm5OJmZnl5mRiZma59SqZSDpW0vWSHpW0XVJ7iTqSdIWk30jaLOnHkk4oUW+CpKWSNkl6RtJVkmoqFcvMzCqvtz2TicC7gV8Bj/dQ5zLgSmAu8D6gG1gi6bBCBUljgCVAAGcDVwGfBGZXMJaZmVVYb5PJPRFxVEScC/yyeKOk/UgSwBciYn5ELAHOJXmhn5Gp+nFgf+CciLgvIq4jefG/WNKB5Y5lZmYDo1fJJCJe3k2Vk4EDgdsy+2wE7gGmZepNAxZHxPpM2S0kSeGdFYhlZmYDoFwT8McD24Eniso7023ZequyFSLiaWBTpl45Y5mZ2QAo1/eZjAG6I2J7UXkXMFLSiIjYmtZbV2L/rnRbuWPtRNJ0YDrA2LFjaW9v3+0Ds93r7u72c2l7LJ+fA2NIfTlWRCwAFgBMmjQp/IVO5eEvx7I9mc/PgVGuYa4uYHSJZbljgE1pT6JQ76AS+49Jt5U7lpmZDYByJZNVQA1wbFF58bzGKormMyQdBYzM1CtnLDMzGwDlSiYPAOtJlvACIGkkyTUiizL1FgFnSDogU3YesBm4vwKxzMxsAPRqziR9MX93evf1wIGSPpje/0FEbJI0B7hSUhdJz+BikmQ1LxPqOuAi4A5Jc4FjgFnANYUlvhHxYrlimZnZwOjtBPzrgO8UlRXuvwFYDcwhecG/HDgEWA68KyKeK+wQEV2SpgLzSa4bWQdcS5IEssoZy8zMKqxXySQiVgPaTZ0AmtPbruqtBE4bqFhmZlZ5/tRgMzPLzcnEzMxyczIxM7PcnEzMzCw3JxMzM8vNycTMzHJzMjEzs9ycTMzMLDcnEzMzy83JxMzMcnMyMTOz3JxMzMwsNycTMzPLzcnEzMxyczIxM7PcnEzMzCw3JxMzM8vNycTMzHJzMjEzs9zKmkwkfUjSzyR1S/qtpJslHVFUR5KukPQbSZsl/VjSCSViTZC0VNImSc9IukpSTX9imZlZZZUtmUg6C2gFHgDOBi4FTgXulZQ9zmXAlcBc4H1AN7BE0mGZWGOAJUCksa4CPgnMLjrsbmOZmVnlDS9jrPOBn0XEjEKBpPXAXcCfAJ2S9iNJAF+IiPlpnQeB1cAM4DPprh8H9gfOiYj1wH2SDgRmSfpiRKzvQywzM6uwcg5z7QO8UFS2Lv2p9OfJwIHAbYUKEbERuAeYltlvGrA4TSQFt5AkmHf2MZaZmVVYOZPJN4F3SPpbSQdKehPwr8CyiFiZ1jke2A48UbRvZ7qNTL1V2QoR8TSwKVOvt7HMzKzCyjbMFRH3SroAaAFuSosfAM7KVBsDdEfE9qLdu4CRkkZExNa03jperSvd1pdYO0iaDkwHGDt2LO3t7X14hNaT7u5uP5e2x/L5OTDKlkwk1QPXAV8FFgFjgVnA9yT9ZYkX/QEXEQuABQCTJk2KKVOmVLdBg0R7ezt+Lm1P5fNzYJRzAv4rwN0RcWmhQNLPSYarzgbuIOk1jJZUU5RcxgCbMj2JLuCgEscYk24r1OlNLDMzq7ByzpkcD/w8WxARvwI2A29Mi1YBNcCxJfbNzpGsomjeQ9JRwMhMvd7GMjOzCitnMnkK+PNsgaRakhVYq9OiB4D1wLmZOiNJrhFZlNl1EXCGpAMyZeeRJKb7+xjLzMwqrJzDXNcB10p6hlfmTD5Lkkh+ABARL0qaA1wpqYukB3ExSVKbVxTrIuAOSXOBY0jmX64pLBfuQywzM6uwciaTrwFbgX8guehwHdABXJ5e/1Ewh+QF/3LgEGA58K6IeK5QISK6JE0F5pNcN7IOuJYkodCXWGZmVnnlXBocwH+kt93Va05vu6q3EjitHLHMzKyy/KnBZmaWm5OJmZnl5mRiZma5OZmYmVluTiZmZpabk4n1W2trK3V1dUydOpW6ujpaW1ur3SQzq5JyXmdiQ0hraytNTU20tLSwfft2ampqaGxsBKChoaHKrTOzgeaeifVLc3MzLS0t1NfXM3z4cOrr62lpaaG52Zf8mA1FTibWL52dnUyePHmnssmTJ9PZ2VmlFplZNTmZWL/U1tbS0dGxU1lHRwe1tbVVapGZVZOTifVLU1MTjY2NtLW1sW3bNtra2mhsbKSpqanaTTOzKvAEvPVLYZJ95syZdHZ2UltbS3NzsyffzYYoJxPrt4aGBhoaGvy1qGbmYS4zM8vPycTMzHJzMjEzs9ycTMzMLDcnEzMzy83JxMzMcitrMpE0XNJlkp6QtEXSGknXFtWRpCsk/UbSZkk/lnRCiVgTJC2VtEnSM5KuklTTn1hmZlZZ5e6Z3AhcBHwZOB24DNhcVOcy4EpgLvA+oBtYIumwQgVJY4AlQABnA1cBnwRm9zWWmZlVXtkuWpR0JnAe8GcRsbKHOvuRJIAvRMT8tOxBYDUwA/hMWvXjwP7AORGxHrhP0oHALElfjIj1fYhlZmYVVs6eyYXAsp4SSepk4EDgtkJBRGwE7gGmZepNAxaniaTgFpIE884+xjIzsworZzL5C+BxSfMlrU/nOu6QdESmzvHAduCJon07023ZequyFSLiaWBTpl5vY5mZWYWV87O5DgMuAH4BfAg4APgi8D1Jb4uIAMYA3RGxvWjfLmCkpBERsTWtt67EMbrSbfQh1g6SpgPTAcaOHUt7e3u/HqjtrLu728+l7bF8fg6MciYTpbezI+IPAJKeBe4HTgOWlvFY/RIRC4AFAJMmTQp/OGF5+IMebU/m83NglHOYqwt4rJBIUh3AVmBCps7o4iW+JL2MTZmeRBdwUIljjEm39SWWmZlVWDmTSSdJz6SYgJfT31cBNcCxRXWK50hWUTTvIekoYGSmXm9jmZlZhZUzmXwfeLOk12bKTgX2IZlHAXgAWA+cW6ggaSTJNSKLMvstAs6QdECm7DySa1bu72MsMzOrsHLOmSwguWDxHklXk0zAzwWWREQHQES8KGkOcKWkLpIexMUkSW1eJtZ1aaw7JM0FjgFmAdcUlgv3IZaZmVVY2ZJJeiHhacDXSK4J2QrcBXyiqOockhf8y4FDgOXAuyLiuUysLklTgfkk142sA64lSSh9imVmZpVX1q/tjYj/B7x7N3UCaE5vu6q3kmQVWO5YZmZWWf7UYDMzy83JxMzMcnMyMTOz3JxMzMwsNycTMzPLzcnEzMxyczIxM7PcnEzMzCw3JxMzM8vNycTMzHJzMjEzs9ycTMzMLDcnEzMzy83JxMzMcnMyMTOz3JxMzMwsNycTMzPLzcnEzMxyczIxM7PcnEzMzCy3iiQTSa+X1C0pJI3OlEvSFZJ+I2mzpB9LOqHE/hMkLZW0SdIzkq6SVFNUp1exzMys8irVM/kS0F2i/DLgSmAu8L60zhJJhxUqSBoDLAECOBu4CvgkMLuvsczMbGCUPZlIOhU4E/hyUfl+JAngCxExPyKWAOeSJI0ZmaofB/YHzomI+yLiOpJEcrGkA/sYy8zMBkBZk0k6FDWPpDextmjzycCBwG2FgojYCNwDTMvUmwYsjoj1mbJbSBLMO/sYy8yGqNbWVurq6pg6dSp1dXW0trZWu0mD2vAyx/s4sC/w78CHi7YdD2wHnigq7wTOK6q3LFshIp6WtCnddk8fYpnZENTa2kpTUxMtLS1s376dmpoaGhsbAWhoaKhy6wansiUTSYcAnwc+EhEvSSquMgbojojtReVdwEhJIyJia1pvXYlDdKXb+hKruI3TgekAY8eOpb29vdePz3rW3d3t59L2KFdccQUXXXQRknjxxRcZPXo0M2fO5IorruDwww+vdvMGpXL2TJqBhyLiB2WMWVYRsQBYADBp0qSYMmVKdRs0SLS3t+Pn0vYkTz/9NDNmzGCfffbZcX6ecsopfOpTn/K5WiFlmTORNBG4ELhK0sGSDgZGppsPkrQ/Sa9hdPESX5JexqZMT6ILOKjEYcak2wp1ehPLzIag2tpaOjo6dirr6Oigtra2Si0a/Mo1AX8csA/wIMkLfRfJvAnAGpJJ+VVADXBs0b7Hp9sKVqVlO0g6iiQ5rcrU6U0sMxuCmpqaaGxspK2tjW3bttHW1kZjYyNNTU3VbtqgVa5hrg6gvqjsTOBS4N3Ar4GngPUkS3j/FUDSSJJrRBZk9lsEfFrSARGxIS07D9gM3J/ef6CXscxsCCpMss+cOZPOzk5qa2tpbm725HsFlSWZRMRaoD1bJml8+utPIqI7LZsDXCmpi6QHcTFJ72heZtfrgIuAOyTNBY4BZgHXFJYLR8SLvYxlZkNUQ0MDDQ0NntMbIOVeGrw7c0he8C8HDgGWA++KiOcKFSKiS9JUYD7JMuB1wLUkCaVPsczMbGBULJlExI3AjUVlQbLqq3k3+64ETttNnV7FMjOzyvOnBpuZWW5OJmZmlpuTiZmZ5eZkYmZmuTmZmJlZbk4mZmaWm5OJmZnl5mRiZma5OZmYmVluTiZmZpabk4mZmeXmZGJmZrk5mZiZWW5OJmZmlpuTiZmZ5eZkYmZmuTmZmJlZbk4mZjYotba2UldXx9SpU6mrq6O1tbXaTRrUBvo74M3MKq61tZWmpiZaWlrYvn07NTU1NDY2AtDQ0FDl1g1OZeuZSDpX0t2SfiupW9LDkl71V5P095KekPRiWmdqiTqvl/Q9SRskrZU0X9LI/sQys6GnubmZlpYW6uvrGT58OPX19bS0tNDc3Fztpg1a5RzmuhjoBj4BnAW0AQslzSxUSJPLdcDNwDTgl8D3JdVl6uwDLAbGAR8C/hk4F1iQPVhvYllleRjB9lSdnZ1Mnjx5p7LJkyfT2dlZpRYNfuUc5npfRKzN3F8m6QiSJDMvLZsF3BQRnweQdD9wInAZ8JG0zgeBWuDYiHgyrfcScIuk2RHxRB9iWYV4GMH2ZLW1tXR0dFBfX7+jrKOjg9ra2iq2anArW8+kKJEUPAIcASDpGOBNwG2ZfV4GvkPSsyiYBvzfQiJJ3QlsBc7sYyyrEA8j2J6sqamJxsZG2tra2LZtG21tbTQ2NtLU1FTtpg1alZ6AfzvwePr78enPVUV1OoHXSDo0Ip5P663MVoiIrZL+fyZGb2NZhXgYwfZkhd7xzJkz6ezspLa2lubmZveaK6hiySSdDH8/cGFaNCb9ua6oaldm+/Ppz+I6hXpjMnV7E6u4TdOB6QBjx46lvb19dw/DenD00Uczf/58TjzxRLq7u2lvb+eRRx7h6KOP9vNqe4TDDz+c+fPn093dzejRowF8blZQRZKJpPHAQuCuiLixEsfoj4hYQDqRP2nSpJgyZUp1G7QXu/rqq3fMmey3335EBPPmzePqq6/Gz6vtSdrb231ODoCyJxNJrwEWAU8BH85sKvQaDmLnHsWYou1daZ1iY4Bf9DGWVYiHEcwsq6xXwKfXgnwfGAG8NyI2ZTYX5jeOL9rteOCPmTmOVcV1JI0AjsnE6G0sq6CGhgZWrFjB0qVLWbFihROJ2RBWzosWh5OspjoOODMifp/dHhG/JpmMPzezz7D0/qJM1UXASZLGZcrOAvYFftjHWGZmNgDKOcz1deDdJBcZHiLpkMy2RyJiC8m1If8laTXw38DfkSSf8zN1vws0AXdIupJkKOtaYGHmGhN6GcvMzAZAOZPJ6enPr5bY9gZgdUS0ShoNXApcSXLV+nsjYkWhYkS8JOlMYD7JdSRbgFuAT2cD9iaWmZkNjLIlk4gY38t6NwA37KbOGpJlxbljmZlZ5fkj6M3MLDcnEzMzy83JxMzMcnMysX7zR9CbWYG/adH6xR9Bb2ZZ7plYv/gj6M0sy8nE+sUfQW9mWU4m1i+Fb7LL8jfZmQ1dnjOxfmlqauK8885j1KhRPP300xx99NFs3LiRr3611AcgmNlg556J5RYR1W6CmVWZk4n1S3NzM7feeitPPvkky5Yt48knn+TWW2/1BLzZEOVkYv3S2dnJmjVrdrrOZM2aNZ6ANxuiPGdi/XLEEUdwySWXsHDhwh3XmZx//vkcccQR1W6aGZBcC9Xc3Lzjm0Cbmpp8DVQFOZlYv0na5X2zavFFtVUQEUPy9pa3vCWs/4YNGxY333xzTJw4MYYNGxYTJ06Mm2++OYYNG1btppnFxIkTY9myZRER0dbWFhERy5Yti4kTJ1axVYMDsDxKvKZ6zsT6pba2liOPPHKn74A/8sgjfZ2J7RF8Ue3A8zCX9VrxMNZpp53Wq3rhpcM2wGpra5k9ezZ33nnnjjmT97///X6zU0HumVivFXdrFy5cyMSJE0HDmDhxIgsXLiw5pGg20Orr65k7dy4XXngh9957LxdeeCFz586lvr6+2k0btDRU/9knTZoUy5cvr3YzBoXxl93L6jnvqXYzzHaoq6vjuOOOY9GiRWzZsoV9992XadOm8cQTT7BixYpqN2+vJunhiJhUXO5hLjMbdFauXMnGjRtZtGjRjtVcF154IU899VS1mzZo7dXJRNIEYB7wdmAd8A1gdkRsr2rDzKyqRowYwYgRI5g6dSoRgSSOO+44RowYUe2mDVp7bTKRNAZYAqwEzgbeCHyFZB7oM1Vs2l7vz2b/iBc2v9SnfcZfdm+f6h+0/z784nOn92kfs97asmULjz/+OGeddRYf/ehH+da3vsXdd99d7WYNanttMgE+DuwPnBMR64H7JB0IzJL0xbTM+uGFzS/1aQ6kvb2dKVOm9OkYfU0+Zn01btw4Fi9ezN13382+++7LuHHjPMxVQXvzaq5pwOKipHELSYJ5Z3WaZGZ7iqeeeoqDDz4YSRx88MFOJBW2167mkvR74OsRMauofCMwKyK+tKv9vZqrZ2++6c0DcpzH/u6xATmODR4+N6tvMK7mGkMy6V6sK932KpKmA9MBxo4dS3t7e8UatzebN25eyfL+rtFva2srWe7n3/pqQ+ecHrc9Nfe9fY437tLvv6ps1D4+N/tjb04mfRYRC4AFkPRM+jrOP9T11Ivtz5yJWX+snrKLjXNeOT8lsc8++/DSS68sJCnc31tHY/Z0e3My6QIOKlE+Jt1mZkPU8OHDGT16NLfffvuO60w+8IEPsGHDhmo3bdDam5PJKuD4bIGko4CR6TYzG6KKL1QcN24cNTU1bN/uS9AqZW9ezbUIOEPSAZmy84DNwP3VaZKZ7QkmTJjAxz72MUaNGoUkRo0axcc+9jEmTJhQ7aYNWntzMrkO2ALcIekv08n1WcA1vsZkYLS2tu70tb2tra3VbpIZAE1NTSxcuJB58+axeBkc0NMAAAZWSURBVPFi5s2bx8KFC2lqaqp20watvXaYKyK6JE0F5gP3kKzsupYkoViF+ZvsbE9WOAdnzpy54yPom5ubfW5W0F57nUlevs4kn7q6OubNm0d9ff2O1VxtbW3MnDnTn8pqexSvNiyvnq4z2ZuHuayK/E12ZpblZGL9UltbS0dHx05lHR0d/iY7syHKycT6pampicbGRtra2ti2bRttbW00NjZ6gtNsiNprJ+CtujzBaWZZTibWbw0NDTQ0NHiC08w8zGX95+tMzKzAPRPrF19nYmZZ7plYvzQ3N9PS0kJ9fT3Dhw+nvr6elpYWmpubq900M6sCJxPrF19nYmZZTibWL77OxMyynEysX3ydiZlleQLe+sXXmZhZlpOJ9ZuvMzGzAg9zmZlZbk4mZmaWm5OJmZnl5mRiZma5OZmYmVluQ/ZreyU9DzxV7XYMEq8F1la7EWY98PlZXuMi4tDiwiGbTKx8JC0v9Z3QZnsCn58Dw8NcZmaWm5OJmZnl5mRi5bCg2g0w2wWfnwPAcyZmZpabeyZmZpabk4mZmeXmZDKESbpA0sOSNkjqkvSIpGv6EWe1pC9Xoo02NEmaJSkyt2ck3S7pjWWKXyvpJ5I2pvHHS2qX9N1MndMl/Us5jjcUOJkMUZIuB74BLAbOAf4WuAs4q5rtMst4AXh7evsUcAKwVNKoMsT+EnAwyfn+duBZ4B+ByzN1TgecTHrJ32cydM0Aro+IKzJl90iaXa0GmRXZFhEPpb8/JOlp4CfAu4HvZCtKqgFqImJrL2MfD9wdEUszZSvzNngoc89k6DoY+F1xYRQt75M0R9JjkrolrZH0bUmH7S64pHdIul/SJkl/kHSDpAMy2w+W9I10+OJFSU9LuqEsj8wGq4fTn+Ml3ShpuaT3S/ol8CLwFwCSTpC0ND33utJzdmy6bbykAN4IfCId4mpPt+0Y5pI0C/gkMC4z1HbjQD7YvY17JkPXz4CZ6bu970fEH3qo9zrgauAZ4FCSf7Blkuoi4uVSO0g6BVgC3Al8EDgEmAOMSe8DXAOcDHyCJKkdBZxahsdlg9f49OfvgInp/S8CV6VlT0o6FGgHOoHzgdEk5959kiaRDGe9HfgesAyYB6wvcaxvAMcBpwF/lZY9X+bHM6g4mQxd/0TyYn8jEJI6gduBL0fEjn+uiLiw8Hs6lPAgsAaYDPy4h9hzgAci4rzMvr8lGe+ui4gVwFuBf4+IWzP7/Vc5HpgNHpIKr1HHAF8HNpC8UZlK8iblLyPi55n6c9Jfzyicx5KeAB4CPhARrSRDZluAZzPDaDuJiDWSngW29FTHduZhriEqIh4FakkmIL8OCLgSWC5pdKGepGmSHpD0ArCNJJEAvKlUXEkjSd753SZpeOEGdAAvAW9Jq/4c+LSkf5RUMpYNeYeQnDMvAb8iSSjnRcSz6fbfZhNJ6q3Aj4reEP0UWE3yBsgqxMlkCIuILRFxT0TMiIgJwP8h6do3Akg6CbibJIH8DUmSeFu6+349hB0D1JAkqJcyty3APiTDWZAsALgT+CzwK0lPSPpQeR+h7eVeAE4CJgFHAuMjYlFm+3Ml9jm8h/LngNeUvYW2g4e5bIeIaJH0RZKVLpCMFT9P8m4wACSN202YdUAAs4AflNj+THqsdcBFwEWS/hS4BPi2pEcjwqtqDJLVXMt3sb3UZ0E9SzLPV2wsr0zgWwW4ZzJESXrVP1w6eXkQr7yz2x94qWiF14d3FTciNpKMT/9JRCwvcXumxD6PAp8mOR+PL95u1gc/Bc4oWjl4EslkfUcfY22l5x64FXHPZOh6TNJdwI+A3wPjSC4M2wTclNa5D/gXSf8G3EOy+uojvYh9Cclk+8vAd0kmTY8G3gM0RcTjkjpIVtSsIHmH+ffARuB/yvPwbIi6BvgHYLGkubyymusxkgUmfbEKGCvpApLzdG1ErC5fUwcXJ5Oh6yrgbOBrJGPJvwMeIBnSehIgIn4g6VJgJsmL/YPAe4HHdxU4IjoknQrMBv6TZA7lKeCHvNLreRC4gOQd43bgEWBaRKwpjmfWWxHxvKR64CtAK0nv4gfAJ/pwQWPBbUA9yfLjQ0neZF1QvtYOLv4IejMzy81zJmZmlpuTiZmZ5eZkYmZmuTmZmJlZbk4mZmaWm5OJmZnl5mRiZma5OZmYmVlu/wuHKSknfWNGWgAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "df_sample.plot.box()\n",
    "plt.title(\"Boxplot of sales and profit\", fontsize=15)\n",
    "plt.xticks(fontsize=15)\n",
    "plt.yticks(fontsize=15)\n",
    "plt.grid(True)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Exercise 12: Concatenation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_1 = df[['Customer Name','State','Sales','Profit']].sample(n=4)\n",
    "df_2 = df[['Customer Name','State','Sales','Profit']].sample(n=4)\n",
    "df_3 = df[['Customer Name','State','Sales','Profit']].sample(n=4)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Customer Name</th>\n",
       "      <th>State</th>\n",
       "      <th>Sales</th>\n",
       "      <th>Profit</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1639</th>\n",
       "      <td>Toby Braunhardt</td>\n",
       "      <td>District of Columbia</td>\n",
       "      <td>9.640</td>\n",
       "      <td>4.4344</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6372</th>\n",
       "      <td>Chuck Clark</td>\n",
       "      <td>New York</td>\n",
       "      <td>6.630</td>\n",
       "      <td>3.1161</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7674</th>\n",
       "      <td>Matt Abelman</td>\n",
       "      <td>Massachusetts</td>\n",
       "      <td>446.068</td>\n",
       "      <td>0.0000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4530</th>\n",
       "      <td>Maria Bertelson</td>\n",
       "      <td>New York</td>\n",
       "      <td>408.006</td>\n",
       "      <td>72.5344</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        Customer Name                 State    Sales   Profit\n",
       "1639  Toby Braunhardt  District of Columbia    9.640   4.4344\n",
       "6372      Chuck Clark              New York    6.630   3.1161\n",
       "7674     Matt Abelman         Massachusetts  446.068   0.0000\n",
       "4530  Maria Bertelson              New York  408.006  72.5344"
      ]
     },
     "execution_count": 49,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Customer Name</th>\n",
       "      <th>State</th>\n",
       "      <th>Sales</th>\n",
       "      <th>Profit</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>457</th>\n",
       "      <td>Natalie Fritzler</td>\n",
       "      <td>Illinois</td>\n",
       "      <td>747.558</td>\n",
       "      <td>-96.1146</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2796</th>\n",
       "      <td>Kean Nguyen</td>\n",
       "      <td>California</td>\n",
       "      <td>308.499</td>\n",
       "      <td>-18.1470</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6233</th>\n",
       "      <td>Jennifer Ferguson</td>\n",
       "      <td>Washington</td>\n",
       "      <td>71.280</td>\n",
       "      <td>34.2144</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2499</th>\n",
       "      <td>Dan Campbell</td>\n",
       "      <td>Florida</td>\n",
       "      <td>37.608</td>\n",
       "      <td>12.6927</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          Customer Name       State    Sales   Profit\n",
       "457    Natalie Fritzler    Illinois  747.558 -96.1146\n",
       "2796        Kean Nguyen  California  308.499 -18.1470\n",
       "6233  Jennifer Ferguson  Washington   71.280  34.2144\n",
       "2499       Dan Campbell     Florida   37.608  12.6927"
      ]
     },
     "execution_count": 50,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Customer Name</th>\n",
       "      <th>State</th>\n",
       "      <th>Sales</th>\n",
       "      <th>Profit</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2063</th>\n",
       "      <td>Greg Guthrie</td>\n",
       "      <td>California</td>\n",
       "      <td>19.296</td>\n",
       "      <td>6.0300</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2256</th>\n",
       "      <td>Laura Armstrong</td>\n",
       "      <td>Maine</td>\n",
       "      <td>109.480</td>\n",
       "      <td>33.9388</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9192</th>\n",
       "      <td>Barry Blumstein</td>\n",
       "      <td>Texas</td>\n",
       "      <td>29.372</td>\n",
       "      <td>-46.9952</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4466</th>\n",
       "      <td>Kelly Andreada</td>\n",
       "      <td>New York</td>\n",
       "      <td>16.380</td>\n",
       "      <td>7.3710</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        Customer Name       State    Sales   Profit\n",
       "2063     Greg Guthrie  California   19.296   6.0300\n",
       "2256  Laura Armstrong       Maine  109.480  33.9388\n",
       "9192  Barry Blumstein       Texas   29.372 -46.9952\n",
       "4466   Kelly Andreada    New York   16.380   7.3710"
      ]
     },
     "execution_count": 51,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Customer Name</th>\n",
       "      <th>State</th>\n",
       "      <th>Sales</th>\n",
       "      <th>Profit</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1639</th>\n",
       "      <td>Toby Braunhardt</td>\n",
       "      <td>District of Columbia</td>\n",
       "      <td>9.640</td>\n",
       "      <td>4.4344</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6372</th>\n",
       "      <td>Chuck Clark</td>\n",
       "      <td>New York</td>\n",
       "      <td>6.630</td>\n",
       "      <td>3.1161</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7674</th>\n",
       "      <td>Matt Abelman</td>\n",
       "      <td>Massachusetts</td>\n",
       "      <td>446.068</td>\n",
       "      <td>0.0000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4530</th>\n",
       "      <td>Maria Bertelson</td>\n",
       "      <td>New York</td>\n",
       "      <td>408.006</td>\n",
       "      <td>72.5344</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>457</th>\n",
       "      <td>Natalie Fritzler</td>\n",
       "      <td>Illinois</td>\n",
       "      <td>747.558</td>\n",
       "      <td>-96.1146</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2796</th>\n",
       "      <td>Kean Nguyen</td>\n",
       "      <td>California</td>\n",
       "      <td>308.499</td>\n",
       "      <td>-18.1470</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6233</th>\n",
       "      <td>Jennifer Ferguson</td>\n",
       "      <td>Washington</td>\n",
       "      <td>71.280</td>\n",
       "      <td>34.2144</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2499</th>\n",
       "      <td>Dan Campbell</td>\n",
       "      <td>Florida</td>\n",
       "      <td>37.608</td>\n",
       "      <td>12.6927</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2063</th>\n",
       "      <td>Greg Guthrie</td>\n",
       "      <td>California</td>\n",
       "      <td>19.296</td>\n",
       "      <td>6.0300</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2256</th>\n",
       "      <td>Laura Armstrong</td>\n",
       "      <td>Maine</td>\n",
       "      <td>109.480</td>\n",
       "      <td>33.9388</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9192</th>\n",
       "      <td>Barry Blumstein</td>\n",
       "      <td>Texas</td>\n",
       "      <td>29.372</td>\n",
       "      <td>-46.9952</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4466</th>\n",
       "      <td>Kelly Andreada</td>\n",
       "      <td>New York</td>\n",
       "      <td>16.380</td>\n",
       "      <td>7.3710</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          Customer Name                 State    Sales   Profit\n",
       "1639    Toby Braunhardt  District of Columbia    9.640   4.4344\n",
       "6372        Chuck Clark              New York    6.630   3.1161\n",
       "7674       Matt Abelman         Massachusetts  446.068   0.0000\n",
       "4530    Maria Bertelson              New York  408.006  72.5344\n",
       "457    Natalie Fritzler              Illinois  747.558 -96.1146\n",
       "2796        Kean Nguyen            California  308.499 -18.1470\n",
       "6233  Jennifer Ferguson            Washington   71.280  34.2144\n",
       "2499       Dan Campbell               Florida   37.608  12.6927\n",
       "2063       Greg Guthrie            California   19.296   6.0300\n",
       "2256    Laura Armstrong                 Maine  109.480  33.9388\n",
       "9192    Barry Blumstein                 Texas   29.372 -46.9952\n",
       "4466     Kelly Andreada              New York   16.380   7.3710"
      ]
     },
     "execution_count": 52,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_cat1 = pd.concat([df_1,df_2,df_3], axis=0)\n",
    "df_cat1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Customer Name</th>\n",
       "      <th>State</th>\n",
       "      <th>Sales</th>\n",
       "      <th>Profit</th>\n",
       "      <th>Customer Name</th>\n",
       "      <th>State</th>\n",
       "      <th>Sales</th>\n",
       "      <th>Profit</th>\n",
       "      <th>Customer Name</th>\n",
       "      <th>State</th>\n",
       "      <th>Sales</th>\n",
       "      <th>Profit</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>457</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Natalie Fritzler</td>\n",
       "      <td>Illinois</td>\n",
       "      <td>747.558</td>\n",
       "      <td>-96.1146</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1639</th>\n",
       "      <td>Toby Braunhardt</td>\n",
       "      <td>District of Columbia</td>\n",
       "      <td>9.640</td>\n",
       "      <td>4.4344</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2063</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Greg Guthrie</td>\n",
       "      <td>California</td>\n",
       "      <td>19.296</td>\n",
       "      <td>6.0300</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2256</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Laura Armstrong</td>\n",
       "      <td>Maine</td>\n",
       "      <td>109.480</td>\n",
       "      <td>33.9388</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2499</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Dan Campbell</td>\n",
       "      <td>Florida</td>\n",
       "      <td>37.608</td>\n",
       "      <td>12.6927</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2796</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Kean Nguyen</td>\n",
       "      <td>California</td>\n",
       "      <td>308.499</td>\n",
       "      <td>-18.1470</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4466</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Kelly Andreada</td>\n",
       "      <td>New York</td>\n",
       "      <td>16.380</td>\n",
       "      <td>7.3710</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4530</th>\n",
       "      <td>Maria Bertelson</td>\n",
       "      <td>New York</td>\n",
       "      <td>408.006</td>\n",
       "      <td>72.5344</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6233</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Jennifer Ferguson</td>\n",
       "      <td>Washington</td>\n",
       "      <td>71.280</td>\n",
       "      <td>34.2144</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6372</th>\n",
       "      <td>Chuck Clark</td>\n",
       "      <td>New York</td>\n",
       "      <td>6.630</td>\n",
       "      <td>3.1161</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7674</th>\n",
       "      <td>Matt Abelman</td>\n",
       "      <td>Massachusetts</td>\n",
       "      <td>446.068</td>\n",
       "      <td>0.0000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9192</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Barry Blumstein</td>\n",
       "      <td>Texas</td>\n",
       "      <td>29.372</td>\n",
       "      <td>-46.9952</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        Customer Name                 State    Sales   Profit  \\\n",
       "457               NaN                   NaN      NaN      NaN   \n",
       "1639  Toby Braunhardt  District of Columbia    9.640   4.4344   \n",
       "2063              NaN                   NaN      NaN      NaN   \n",
       "2256              NaN                   NaN      NaN      NaN   \n",
       "2499              NaN                   NaN      NaN      NaN   \n",
       "2796              NaN                   NaN      NaN      NaN   \n",
       "4466              NaN                   NaN      NaN      NaN   \n",
       "4530  Maria Bertelson              New York  408.006  72.5344   \n",
       "6233              NaN                   NaN      NaN      NaN   \n",
       "6372      Chuck Clark              New York    6.630   3.1161   \n",
       "7674     Matt Abelman         Massachusetts  446.068   0.0000   \n",
       "9192              NaN                   NaN      NaN      NaN   \n",
       "\n",
       "          Customer Name       State    Sales   Profit    Customer Name  \\\n",
       "457    Natalie Fritzler    Illinois  747.558 -96.1146              NaN   \n",
       "1639                NaN         NaN      NaN      NaN              NaN   \n",
       "2063                NaN         NaN      NaN      NaN     Greg Guthrie   \n",
       "2256                NaN         NaN      NaN      NaN  Laura Armstrong   \n",
       "2499       Dan Campbell     Florida   37.608  12.6927              NaN   \n",
       "2796        Kean Nguyen  California  308.499 -18.1470              NaN   \n",
       "4466                NaN         NaN      NaN      NaN   Kelly Andreada   \n",
       "4530                NaN         NaN      NaN      NaN              NaN   \n",
       "6233  Jennifer Ferguson  Washington   71.280  34.2144              NaN   \n",
       "6372                NaN         NaN      NaN      NaN              NaN   \n",
       "7674                NaN         NaN      NaN      NaN              NaN   \n",
       "9192                NaN         NaN      NaN      NaN  Barry Blumstein   \n",
       "\n",
       "           State    Sales   Profit  \n",
       "457          NaN      NaN      NaN  \n",
       "1639         NaN      NaN      NaN  \n",
       "2063  California   19.296   6.0300  \n",
       "2256       Maine  109.480  33.9388  \n",
       "2499         NaN      NaN      NaN  \n",
       "2796         NaN      NaN      NaN  \n",
       "4466    New York   16.380   7.3710  \n",
       "4530         NaN      NaN      NaN  \n",
       "6233         NaN      NaN      NaN  \n",
       "6372         NaN      NaN      NaN  \n",
       "7674         NaN      NaN      NaN  \n",
       "9192       Texas   29.372 -46.9952  "
      ]
     },
     "execution_count": 53,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_cat2 = pd.concat([df_1,df_2,df_3], axis=1)\n",
    "df_cat2"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Exercise 13: Merging by a common key"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Customer Name</th>\n",
       "      <th>Ship Date</th>\n",
       "      <th>Ship Mode</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Claire Gute</td>\n",
       "      <td>2016-11-11</td>\n",
       "      <td>Second Class</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Claire Gute</td>\n",
       "      <td>2016-11-11</td>\n",
       "      <td>Second Class</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Darrin Van Huff</td>\n",
       "      <td>2016-06-16</td>\n",
       "      <td>Second Class</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Sean O'Donnell</td>\n",
       "      <td>2015-10-18</td>\n",
       "      <td>Standard Class</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     Customer Name  Ship Date       Ship Mode\n",
       "0      Claire Gute 2016-11-11    Second Class\n",
       "1      Claire Gute 2016-11-11    Second Class\n",
       "2  Darrin Van Huff 2016-06-16    Second Class\n",
       "3   Sean O'Donnell 2015-10-18  Standard Class"
      ]
     },
     "execution_count": 54,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_1=df[['Customer Name','Ship Date','Ship Mode']][0:4]\n",
    "df_1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Customer Name</th>\n",
       "      <th>Product Name</th>\n",
       "      <th>Quantity</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Claire Gute</td>\n",
       "      <td>Bush Somerset Collection Bookcase</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Claire Gute</td>\n",
       "      <td>Hon Deluxe Fabric Upholstered Stacking Chairs,...</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Darrin Van Huff</td>\n",
       "      <td>Self-Adhesive Address Labels for Typewriters b...</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Sean O'Donnell</td>\n",
       "      <td>Bretford CR4500 Series Slim Rectangular Table</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     Customer Name                                       Product Name  \\\n",
       "0      Claire Gute                  Bush Somerset Collection Bookcase   \n",
       "1      Claire Gute  Hon Deluxe Fabric Upholstered Stacking Chairs,...   \n",
       "2  Darrin Van Huff  Self-Adhesive Address Labels for Typewriters b...   \n",
       "3   Sean O'Donnell      Bretford CR4500 Series Slim Rectangular Table   \n",
       "\n",
       "   Quantity  \n",
       "0         2  \n",
       "1         3  \n",
       "2         2  \n",
       "3         5  "
      ]
     },
     "execution_count": 55,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_2=df[['Customer Name','Product Name','Quantity']][0:4]\n",
    "df_2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Customer Name</th>\n",
       "      <th>Ship Date</th>\n",
       "      <th>Ship Mode</th>\n",
       "      <th>Product Name</th>\n",
       "      <th>Quantity</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Claire Gute</td>\n",
       "      <td>2016-11-11</td>\n",
       "      <td>Second Class</td>\n",
       "      <td>Bush Somerset Collection Bookcase</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Claire Gute</td>\n",
       "      <td>2016-11-11</td>\n",
       "      <td>Second Class</td>\n",
       "      <td>Hon Deluxe Fabric Upholstered Stacking Chairs,...</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Claire Gute</td>\n",
       "      <td>2016-11-11</td>\n",
       "      <td>Second Class</td>\n",
       "      <td>Bush Somerset Collection Bookcase</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Claire Gute</td>\n",
       "      <td>2016-11-11</td>\n",
       "      <td>Second Class</td>\n",
       "      <td>Hon Deluxe Fabric Upholstered Stacking Chairs,...</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Darrin Van Huff</td>\n",
       "      <td>2016-06-16</td>\n",
       "      <td>Second Class</td>\n",
       "      <td>Self-Adhesive Address Labels for Typewriters b...</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>Sean O'Donnell</td>\n",
       "      <td>2015-10-18</td>\n",
       "      <td>Standard Class</td>\n",
       "      <td>Bretford CR4500 Series Slim Rectangular Table</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     Customer Name  Ship Date       Ship Mode  \\\n",
       "0      Claire Gute 2016-11-11    Second Class   \n",
       "1      Claire Gute 2016-11-11    Second Class   \n",
       "2      Claire Gute 2016-11-11    Second Class   \n",
       "3      Claire Gute 2016-11-11    Second Class   \n",
       "4  Darrin Van Huff 2016-06-16    Second Class   \n",
       "5   Sean O'Donnell 2015-10-18  Standard Class   \n",
       "\n",
       "                                        Product Name  Quantity  \n",
       "0                  Bush Somerset Collection Bookcase         2  \n",
       "1  Hon Deluxe Fabric Upholstered Stacking Chairs,...         3  \n",
       "2                  Bush Somerset Collection Bookcase         2  \n",
       "3  Hon Deluxe Fabric Upholstered Stacking Chairs,...         3  \n",
       "4  Self-Adhesive Address Labels for Typewriters b...         2  \n",
       "5      Bretford CR4500 Series Slim Rectangular Table         5  "
      ]
     },
     "execution_count": 56,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.merge(df_1,df_2,on='Customer Name',how='inner')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Customer Name</th>\n",
       "      <th>Ship Date</th>\n",
       "      <th>Ship Mode</th>\n",
       "      <th>Product Name</th>\n",
       "      <th>Quantity</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Claire Gute</td>\n",
       "      <td>2016-11-11</td>\n",
       "      <td>Second Class</td>\n",
       "      <td>Bush Somerset Collection Bookcase</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Claire Gute</td>\n",
       "      <td>2016-11-11</td>\n",
       "      <td>Second Class</td>\n",
       "      <td>Hon Deluxe Fabric Upholstered Stacking Chairs,...</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Darrin Van Huff</td>\n",
       "      <td>2016-06-16</td>\n",
       "      <td>Second Class</td>\n",
       "      <td>Self-Adhesive Address Labels for Typewriters b...</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>Sean O'Donnell</td>\n",
       "      <td>2015-10-18</td>\n",
       "      <td>Standard Class</td>\n",
       "      <td>Bretford CR4500 Series Slim Rectangular Table</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     Customer Name  Ship Date       Ship Mode  \\\n",
       "0      Claire Gute 2016-11-11    Second Class   \n",
       "1      Claire Gute 2016-11-11    Second Class   \n",
       "4  Darrin Van Huff 2016-06-16    Second Class   \n",
       "5   Sean O'Donnell 2015-10-18  Standard Class   \n",
       "\n",
       "                                        Product Name  Quantity  \n",
       "0                  Bush Somerset Collection Bookcase         2  \n",
       "1  Hon Deluxe Fabric Upholstered Stacking Chairs,...         3  \n",
       "4  Self-Adhesive Address Labels for Typewriters b...         2  \n",
       "5      Bretford CR4500 Series Slim Rectangular Table         5  "
      ]
     },
     "execution_count": 57,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.merge(df_1,df_2,on='Customer Name',how='inner').drop_duplicates()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Customer Name</th>\n",
       "      <th>Product Name</th>\n",
       "      <th>Quantity</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Darrin Van Huff</td>\n",
       "      <td>Self-Adhesive Address Labels for Typewriters b...</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Sean O'Donnell</td>\n",
       "      <td>Bretford CR4500 Series Slim Rectangular Table</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Sean O'Donnell</td>\n",
       "      <td>Eldon Fold 'N Roll Cart System</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>Brosina Hoffman</td>\n",
       "      <td>Eldon Expressions Wood and Plastic Desk Access...</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     Customer Name                                       Product Name  \\\n",
       "2  Darrin Van Huff  Self-Adhesive Address Labels for Typewriters b...   \n",
       "3   Sean O'Donnell      Bretford CR4500 Series Slim Rectangular Table   \n",
       "4   Sean O'Donnell                     Eldon Fold 'N Roll Cart System   \n",
       "5  Brosina Hoffman  Eldon Expressions Wood and Plastic Desk Access...   \n",
       "\n",
       "   Quantity  \n",
       "2         2  \n",
       "3         5  \n",
       "4         2  \n",
       "5         7  "
      ]
     },
     "execution_count": 58,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_3=df[['Customer Name','Product Name','Quantity']][2:6]\n",
    "df_3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Customer Name</th>\n",
       "      <th>Ship Date</th>\n",
       "      <th>Ship Mode</th>\n",
       "      <th>Product Name</th>\n",
       "      <th>Quantity</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Darrin Van Huff</td>\n",
       "      <td>2016-06-16</td>\n",
       "      <td>Second Class</td>\n",
       "      <td>Self-Adhesive Address Labels for Typewriters b...</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Sean O'Donnell</td>\n",
       "      <td>2015-10-18</td>\n",
       "      <td>Standard Class</td>\n",
       "      <td>Bretford CR4500 Series Slim Rectangular Table</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Sean O'Donnell</td>\n",
       "      <td>2015-10-18</td>\n",
       "      <td>Standard Class</td>\n",
       "      <td>Eldon Fold 'N Roll Cart System</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     Customer Name  Ship Date       Ship Mode  \\\n",
       "0  Darrin Van Huff 2016-06-16    Second Class   \n",
       "1   Sean O'Donnell 2015-10-18  Standard Class   \n",
       "2   Sean O'Donnell 2015-10-18  Standard Class   \n",
       "\n",
       "                                        Product Name  Quantity  \n",
       "0  Self-Adhesive Address Labels for Typewriters b...         2  \n",
       "1      Bretford CR4500 Series Slim Rectangular Table         5  \n",
       "2                     Eldon Fold 'N Roll Cart System         2  "
      ]
     },
     "execution_count": 59,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.merge(df_1,df_3,on='Customer Name',how='inner').drop_duplicates()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 60,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Customer Name</th>\n",
       "      <th>Ship Date</th>\n",
       "      <th>Ship Mode</th>\n",
       "      <th>Product Name</th>\n",
       "      <th>Quantity</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Claire Gute</td>\n",
       "      <td>2016-11-11</td>\n",
       "      <td>Second Class</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Darrin Van Huff</td>\n",
       "      <td>2016-06-16</td>\n",
       "      <td>Second Class</td>\n",
       "      <td>Self-Adhesive Address Labels for Typewriters b...</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Sean O'Donnell</td>\n",
       "      <td>2015-10-18</td>\n",
       "      <td>Standard Class</td>\n",
       "      <td>Bretford CR4500 Series Slim Rectangular Table</td>\n",
       "      <td>5.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Sean O'Donnell</td>\n",
       "      <td>2015-10-18</td>\n",
       "      <td>Standard Class</td>\n",
       "      <td>Eldon Fold 'N Roll Cart System</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>Brosina Hoffman</td>\n",
       "      <td>NaT</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Eldon Expressions Wood and Plastic Desk Access...</td>\n",
       "      <td>7.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     Customer Name  Ship Date       Ship Mode  \\\n",
       "0      Claire Gute 2016-11-11    Second Class   \n",
       "2  Darrin Van Huff 2016-06-16    Second Class   \n",
       "3   Sean O'Donnell 2015-10-18  Standard Class   \n",
       "4   Sean O'Donnell 2015-10-18  Standard Class   \n",
       "5  Brosina Hoffman        NaT             NaN   \n",
       "\n",
       "                                        Product Name  Quantity  \n",
       "0                                                NaN       NaN  \n",
       "2  Self-Adhesive Address Labels for Typewriters b...       2.0  \n",
       "3      Bretford CR4500 Series Slim Rectangular Table       5.0  \n",
       "4                     Eldon Fold 'N Roll Cart System       2.0  \n",
       "5  Eldon Expressions Wood and Plastic Desk Access...       7.0  "
      ]
     },
     "execution_count": 60,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.merge(df_1,df_3,on='Customer Name',how='outer').drop_duplicates()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Exercise 14: Join method"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Ship Date</th>\n",
       "      <th>Ship Mode</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Customer Name</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Claire Gute</th>\n",
       "      <td>2016-11-11</td>\n",
       "      <td>Second Class</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Claire Gute</th>\n",
       "      <td>2016-11-11</td>\n",
       "      <td>Second Class</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Darrin Van Huff</th>\n",
       "      <td>2016-06-16</td>\n",
       "      <td>Second Class</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Sean O'Donnell</th>\n",
       "      <td>2015-10-18</td>\n",
       "      <td>Standard Class</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                 Ship Date       Ship Mode\n",
       "Customer Name                             \n",
       "Claire Gute     2016-11-11    Second Class\n",
       "Claire Gute     2016-11-11    Second Class\n",
       "Darrin Van Huff 2016-06-16    Second Class\n",
       "Sean O'Donnell  2015-10-18  Standard Class"
      ]
     },
     "execution_count": 61,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_1=df[['Customer Name','Ship Date','Ship Mode']][0:4]\n",
    "df_1.set_index(['Customer Name'],inplace=True)\n",
    "df_1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Product Name</th>\n",
       "      <th>Quantity</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Customer Name</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Darrin Van Huff</th>\n",
       "      <td>Self-Adhesive Address Labels for Typewriters b...</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Sean O'Donnell</th>\n",
       "      <td>Bretford CR4500 Series Slim Rectangular Table</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Sean O'Donnell</th>\n",
       "      <td>Eldon Fold 'N Roll Cart System</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Brosina Hoffman</th>\n",
       "      <td>Eldon Expressions Wood and Plastic Desk Access...</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                      Product Name  Quantity\n",
       "Customer Name                                                               \n",
       "Darrin Van Huff  Self-Adhesive Address Labels for Typewriters b...         2\n",
       "Sean O'Donnell       Bretford CR4500 Series Slim Rectangular Table         5\n",
       "Sean O'Donnell                      Eldon Fold 'N Roll Cart System         2\n",
       "Brosina Hoffman  Eldon Expressions Wood and Plastic Desk Access...         7"
      ]
     },
     "execution_count": 62,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_2=df[['Customer Name','Product Name','Quantity']][2:6]\n",
    "df_2.set_index(['Customer Name'],inplace=True)\n",
    "df_2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 63,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Ship Date</th>\n",
       "      <th>Ship Mode</th>\n",
       "      <th>Product Name</th>\n",
       "      <th>Quantity</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Customer Name</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Claire Gute</th>\n",
       "      <td>2016-11-11</td>\n",
       "      <td>Second Class</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Darrin Van Huff</th>\n",
       "      <td>2016-06-16</td>\n",
       "      <td>Second Class</td>\n",
       "      <td>Self-Adhesive Address Labels for Typewriters b...</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Sean O'Donnell</th>\n",
       "      <td>2015-10-18</td>\n",
       "      <td>Standard Class</td>\n",
       "      <td>Bretford CR4500 Series Slim Rectangular Table</td>\n",
       "      <td>5.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Sean O'Donnell</th>\n",
       "      <td>2015-10-18</td>\n",
       "      <td>Standard Class</td>\n",
       "      <td>Eldon Fold 'N Roll Cart System</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                 Ship Date       Ship Mode  \\\n",
       "Customer Name                                \n",
       "Claire Gute     2016-11-11    Second Class   \n",
       "Darrin Van Huff 2016-06-16    Second Class   \n",
       "Sean O'Donnell  2015-10-18  Standard Class   \n",
       "Sean O'Donnell  2015-10-18  Standard Class   \n",
       "\n",
       "                                                      Product Name  Quantity  \n",
       "Customer Name                                                                 \n",
       "Claire Gute                                                    NaN       NaN  \n",
       "Darrin Van Huff  Self-Adhesive Address Labels for Typewriters b...       2.0  \n",
       "Sean O'Donnell       Bretford CR4500 Series Slim Rectangular Table       5.0  \n",
       "Sean O'Donnell                      Eldon Fold 'N Roll Cart System       2.0  "
      ]
     },
     "execution_count": 63,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_1.join(df_2,how='left').drop_duplicates()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 64,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Ship Date</th>\n",
       "      <th>Ship Mode</th>\n",
       "      <th>Product Name</th>\n",
       "      <th>Quantity</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Customer Name</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Brosina Hoffman</th>\n",
       "      <td>NaT</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Eldon Expressions Wood and Plastic Desk Access...</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Darrin Van Huff</th>\n",
       "      <td>2016-06-16</td>\n",
       "      <td>Second Class</td>\n",
       "      <td>Self-Adhesive Address Labels for Typewriters b...</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Sean O'Donnell</th>\n",
       "      <td>2015-10-18</td>\n",
       "      <td>Standard Class</td>\n",
       "      <td>Bretford CR4500 Series Slim Rectangular Table</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Sean O'Donnell</th>\n",
       "      <td>2015-10-18</td>\n",
       "      <td>Standard Class</td>\n",
       "      <td>Eldon Fold 'N Roll Cart System</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                 Ship Date       Ship Mode  \\\n",
       "Customer Name                                \n",
       "Brosina Hoffman        NaT             NaN   \n",
       "Darrin Van Huff 2016-06-16    Second Class   \n",
       "Sean O'Donnell  2015-10-18  Standard Class   \n",
       "Sean O'Donnell  2015-10-18  Standard Class   \n",
       "\n",
       "                                                      Product Name  Quantity  \n",
       "Customer Name                                                                 \n",
       "Brosina Hoffman  Eldon Expressions Wood and Plastic Desk Access...         7  \n",
       "Darrin Van Huff  Self-Adhesive Address Labels for Typewriters b...         2  \n",
       "Sean O'Donnell       Bretford CR4500 Series Slim Rectangular Table         5  \n",
       "Sean O'Donnell                      Eldon Fold 'N Roll Cart System         2  "
      ]
     },
     "execution_count": 64,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_1.join(df_2,how='right').drop_duplicates()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 65,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Ship Date</th>\n",
       "      <th>Ship Mode</th>\n",
       "      <th>Product Name</th>\n",
       "      <th>Quantity</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Customer Name</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Darrin Van Huff</th>\n",
       "      <td>2016-06-16</td>\n",
       "      <td>Second Class</td>\n",
       "      <td>Self-Adhesive Address Labels for Typewriters b...</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Sean O'Donnell</th>\n",
       "      <td>2015-10-18</td>\n",
       "      <td>Standard Class</td>\n",
       "      <td>Bretford CR4500 Series Slim Rectangular Table</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Sean O'Donnell</th>\n",
       "      <td>2015-10-18</td>\n",
       "      <td>Standard Class</td>\n",
       "      <td>Eldon Fold 'N Roll Cart System</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                 Ship Date       Ship Mode  \\\n",
       "Customer Name                                \n",
       "Darrin Van Huff 2016-06-16    Second Class   \n",
       "Sean O'Donnell  2015-10-18  Standard Class   \n",
       "Sean O'Donnell  2015-10-18  Standard Class   \n",
       "\n",
       "                                                      Product Name  Quantity  \n",
       "Customer Name                                                                 \n",
       "Darrin Van Huff  Self-Adhesive Address Labels for Typewriters b...         2  \n",
       "Sean O'Donnell       Bretford CR4500 Series Slim Rectangular Table         5  \n",
       "Sean O'Donnell                      Eldon Fold 'N Roll Cart System         2  "
      ]
     },
     "execution_count": 65,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_1.join(df_2,how='inner').drop_duplicates()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Ship Date</th>\n",
       "      <th>Ship Mode</th>\n",
       "      <th>Product Name</th>\n",
       "      <th>Quantity</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Customer Name</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Brosina Hoffman</th>\n",
       "      <td>NaT</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Eldon Expressions Wood and Plastic Desk Access...</td>\n",
       "      <td>7.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Claire Gute</th>\n",
       "      <td>2016-11-11</td>\n",
       "      <td>Second Class</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Darrin Van Huff</th>\n",
       "      <td>2016-06-16</td>\n",
       "      <td>Second Class</td>\n",
       "      <td>Self-Adhesive Address Labels for Typewriters b...</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Sean O'Donnell</th>\n",
       "      <td>2015-10-18</td>\n",
       "      <td>Standard Class</td>\n",
       "      <td>Bretford CR4500 Series Slim Rectangular Table</td>\n",
       "      <td>5.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Sean O'Donnell</th>\n",
       "      <td>2015-10-18</td>\n",
       "      <td>Standard Class</td>\n",
       "      <td>Eldon Fold 'N Roll Cart System</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                 Ship Date       Ship Mode  \\\n",
       "Customer Name                                \n",
       "Brosina Hoffman        NaT             NaN   \n",
       "Claire Gute     2016-11-11    Second Class   \n",
       "Darrin Van Huff 2016-06-16    Second Class   \n",
       "Sean O'Donnell  2015-10-18  Standard Class   \n",
       "Sean O'Donnell  2015-10-18  Standard Class   \n",
       "\n",
       "                                                      Product Name  Quantity  \n",
       "Customer Name                                                                 \n",
       "Brosina Hoffman  Eldon Expressions Wood and Plastic Desk Access...       7.0  \n",
       "Claire Gute                                                    NaN       NaN  \n",
       "Darrin Van Huff  Self-Adhesive Address Labels for Typewriters b...       2.0  \n",
       "Sean O'Donnell       Bretford CR4500 Series Slim Rectangular Table       5.0  \n",
       "Sean O'Donnell                      Eldon Fold 'N Roll Cart System       2.0  "
      ]
     },
     "execution_count": 66,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_1.join(df_2,how='outer').drop_duplicates()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Miscelleneous useful methods"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Exercise 15: Randomized sampling - `sample` method"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 67,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Order ID</th>\n",
       "      <th>Order Date</th>\n",
       "      <th>Ship Date</th>\n",
       "      <th>Ship Mode</th>\n",
       "      <th>Customer ID</th>\n",
       "      <th>Customer Name</th>\n",
       "      <th>Segment</th>\n",
       "      <th>City</th>\n",
       "      <th>State</th>\n",
       "      <th>Postal Code</th>\n",
       "      <th>Region</th>\n",
       "      <th>Product ID</th>\n",
       "      <th>Category</th>\n",
       "      <th>Sub-Category</th>\n",
       "      <th>Product Name</th>\n",
       "      <th>Sales</th>\n",
       "      <th>Quantity</th>\n",
       "      <th>Discount</th>\n",
       "      <th>Profit</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>6961</th>\n",
       "      <td>CA-2017-162096</td>\n",
       "      <td>2017-11-10</td>\n",
       "      <td>2017-11-10</td>\n",
       "      <td>Same Day</td>\n",
       "      <td>TB-21190</td>\n",
       "      <td>Thomas Brumley</td>\n",
       "      <td>Home Office</td>\n",
       "      <td>Riverside</td>\n",
       "      <td>California</td>\n",
       "      <td>92503</td>\n",
       "      <td>West</td>\n",
       "      <td>OFF-AR-10002221</td>\n",
       "      <td>Office Supplies</td>\n",
       "      <td>Art</td>\n",
       "      <td>12 Colored Short Pencils</td>\n",
       "      <td>7.800</td>\n",
       "      <td>3</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.1060</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1000</th>\n",
       "      <td>CA-2016-155488</td>\n",
       "      <td>2016-11-13</td>\n",
       "      <td>2016-11-17</td>\n",
       "      <td>Standard Class</td>\n",
       "      <td>FM-14290</td>\n",
       "      <td>Frank Merwin</td>\n",
       "      <td>Home Office</td>\n",
       "      <td>Vancouver</td>\n",
       "      <td>Washington</td>\n",
       "      <td>98661</td>\n",
       "      <td>West</td>\n",
       "      <td>OFF-AR-10002956</td>\n",
       "      <td>Office Supplies</td>\n",
       "      <td>Art</td>\n",
       "      <td>Boston 16801 Nautilus Battery Pencil Sharpener</td>\n",
       "      <td>44.020</td>\n",
       "      <td>2</td>\n",
       "      <td>0.0</td>\n",
       "      <td>11.4452</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3593</th>\n",
       "      <td>CA-2016-164154</td>\n",
       "      <td>2016-09-11</td>\n",
       "      <td>2016-09-17</td>\n",
       "      <td>Standard Class</td>\n",
       "      <td>NZ-18565</td>\n",
       "      <td>Nick Zandusky</td>\n",
       "      <td>Home Office</td>\n",
       "      <td>Toledo</td>\n",
       "      <td>Ohio</td>\n",
       "      <td>43615</td>\n",
       "      <td>East</td>\n",
       "      <td>OFF-BI-10001658</td>\n",
       "      <td>Office Supplies</td>\n",
       "      <td>Binders</td>\n",
       "      <td>GBC Standard Therm-A-Bind Covers</td>\n",
       "      <td>22.428</td>\n",
       "      <td>3</td>\n",
       "      <td>0.7</td>\n",
       "      <td>-17.9424</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>276</th>\n",
       "      <td>CA-2017-132976</td>\n",
       "      <td>2017-10-13</td>\n",
       "      <td>2017-10-17</td>\n",
       "      <td>Standard Class</td>\n",
       "      <td>AG-10495</td>\n",
       "      <td>Andrew Gjertsen</td>\n",
       "      <td>Corporate</td>\n",
       "      <td>Philadelphia</td>\n",
       "      <td>Pennsylvania</td>\n",
       "      <td>19140</td>\n",
       "      <td>East</td>\n",
       "      <td>OFF-PA-10000673</td>\n",
       "      <td>Office Supplies</td>\n",
       "      <td>Paper</td>\n",
       "      <td>Post-it “Important Message” Note Pad, Neon Col...</td>\n",
       "      <td>11.648</td>\n",
       "      <td>2</td>\n",
       "      <td>0.2</td>\n",
       "      <td>4.0768</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8104</th>\n",
       "      <td>CA-2015-149846</td>\n",
       "      <td>2015-05-22</td>\n",
       "      <td>2015-05-26</td>\n",
       "      <td>Standard Class</td>\n",
       "      <td>SB-20185</td>\n",
       "      <td>Sarah Brown</td>\n",
       "      <td>Consumer</td>\n",
       "      <td>Los Angeles</td>\n",
       "      <td>California</td>\n",
       "      <td>90045</td>\n",
       "      <td>West</td>\n",
       "      <td>TEC-PH-10003645</td>\n",
       "      <td>Technology</td>\n",
       "      <td>Phones</td>\n",
       "      <td>Aastra 57i VoIP phone</td>\n",
       "      <td>775.728</td>\n",
       "      <td>6</td>\n",
       "      <td>0.2</td>\n",
       "      <td>58.1796</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "            Order ID Order Date  Ship Date       Ship Mode Customer ID  \\\n",
       "6961  CA-2017-162096 2017-11-10 2017-11-10        Same Day    TB-21190   \n",
       "1000  CA-2016-155488 2016-11-13 2016-11-17  Standard Class    FM-14290   \n",
       "3593  CA-2016-164154 2016-09-11 2016-09-17  Standard Class    NZ-18565   \n",
       "276   CA-2017-132976 2017-10-13 2017-10-17  Standard Class    AG-10495   \n",
       "8104  CA-2015-149846 2015-05-22 2015-05-26  Standard Class    SB-20185   \n",
       "\n",
       "        Customer Name      Segment          City         State  Postal Code  \\\n",
       "6961   Thomas Brumley  Home Office     Riverside    California        92503   \n",
       "1000     Frank Merwin  Home Office     Vancouver    Washington        98661   \n",
       "3593    Nick Zandusky  Home Office        Toledo          Ohio        43615   \n",
       "276   Andrew Gjertsen    Corporate  Philadelphia  Pennsylvania        19140   \n",
       "8104      Sarah Brown     Consumer   Los Angeles    California        90045   \n",
       "\n",
       "     Region       Product ID         Category Sub-Category  \\\n",
       "6961   West  OFF-AR-10002221  Office Supplies          Art   \n",
       "1000   West  OFF-AR-10002956  Office Supplies          Art   \n",
       "3593   East  OFF-BI-10001658  Office Supplies      Binders   \n",
       "276    East  OFF-PA-10000673  Office Supplies        Paper   \n",
       "8104   West  TEC-PH-10003645       Technology       Phones   \n",
       "\n",
       "                                           Product Name    Sales  Quantity  \\\n",
       "6961                           12 Colored Short Pencils    7.800         3   \n",
       "1000     Boston 16801 Nautilus Battery Pencil Sharpener   44.020         2   \n",
       "3593                   GBC Standard Therm-A-Bind Covers   22.428         3   \n",
       "276   Post-it “Important Message” Note Pad, Neon Col...   11.648         2   \n",
       "8104                              Aastra 57i VoIP phone  775.728         6   \n",
       "\n",
       "      Discount   Profit  \n",
       "6961       0.0   2.1060  \n",
       "1000       0.0  11.4452  \n",
       "3593       0.7 -17.9424  \n",
       "276        0.2   4.0768  \n",
       "8104       0.2  58.1796  "
      ]
     },
     "execution_count": 67,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.sample(n=5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 68,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Order ID</th>\n",
       "      <th>Order Date</th>\n",
       "      <th>Ship Date</th>\n",
       "      <th>Ship Mode</th>\n",
       "      <th>Customer ID</th>\n",
       "      <th>Customer Name</th>\n",
       "      <th>Segment</th>\n",
       "      <th>City</th>\n",
       "      <th>State</th>\n",
       "      <th>Postal Code</th>\n",
       "      <th>Region</th>\n",
       "      <th>Product ID</th>\n",
       "      <th>Category</th>\n",
       "      <th>Sub-Category</th>\n",
       "      <th>Product Name</th>\n",
       "      <th>Sales</th>\n",
       "      <th>Quantity</th>\n",
       "      <th>Discount</th>\n",
       "      <th>Profit</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2803</th>\n",
       "      <td>CA-2015-159380</td>\n",
       "      <td>2015-05-12</td>\n",
       "      <td>2015-05-16</td>\n",
       "      <td>Standard Class</td>\n",
       "      <td>CS-12505</td>\n",
       "      <td>Cindy Stewart</td>\n",
       "      <td>Consumer</td>\n",
       "      <td>San Francisco</td>\n",
       "      <td>California</td>\n",
       "      <td>94122</td>\n",
       "      <td>West</td>\n",
       "      <td>OFF-PA-10003893</td>\n",
       "      <td>Office Supplies</td>\n",
       "      <td>Paper</td>\n",
       "      <td>Xerox 1962</td>\n",
       "      <td>12.840</td>\n",
       "      <td>3</td>\n",
       "      <td>0.0</td>\n",
       "      <td>5.7780</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8480</th>\n",
       "      <td>CA-2014-109890</td>\n",
       "      <td>2014-07-21</td>\n",
       "      <td>2014-07-27</td>\n",
       "      <td>Standard Class</td>\n",
       "      <td>PG-18820</td>\n",
       "      <td>Patrick Gardner</td>\n",
       "      <td>Consumer</td>\n",
       "      <td>Omaha</td>\n",
       "      <td>Nebraska</td>\n",
       "      <td>68104</td>\n",
       "      <td>Central</td>\n",
       "      <td>TEC-PH-10004100</td>\n",
       "      <td>Technology</td>\n",
       "      <td>Phones</td>\n",
       "      <td>Griffin GC17055 Auxiliary Audio Cable</td>\n",
       "      <td>35.980</td>\n",
       "      <td>2</td>\n",
       "      <td>0.0</td>\n",
       "      <td>10.0744</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7817</th>\n",
       "      <td>CA-2016-138478</td>\n",
       "      <td>2016-10-21</td>\n",
       "      <td>2016-10-26</td>\n",
       "      <td>Second Class</td>\n",
       "      <td>DP-13390</td>\n",
       "      <td>Dennis Pardue</td>\n",
       "      <td>Home Office</td>\n",
       "      <td>North Las Vegas</td>\n",
       "      <td>Nevada</td>\n",
       "      <td>89031</td>\n",
       "      <td>West</td>\n",
       "      <td>OFF-PA-10001801</td>\n",
       "      <td>Office Supplies</td>\n",
       "      <td>Paper</td>\n",
       "      <td>Xerox 193</td>\n",
       "      <td>35.880</td>\n",
       "      <td>6</td>\n",
       "      <td>0.0</td>\n",
       "      <td>17.5812</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6450</th>\n",
       "      <td>CA-2015-156510</td>\n",
       "      <td>2015-09-25</td>\n",
       "      <td>2015-09-29</td>\n",
       "      <td>Standard Class</td>\n",
       "      <td>EH-13990</td>\n",
       "      <td>Erica Hackney</td>\n",
       "      <td>Consumer</td>\n",
       "      <td>Meriden</td>\n",
       "      <td>Connecticut</td>\n",
       "      <td>6450</td>\n",
       "      <td>East</td>\n",
       "      <td>OFF-PA-10002222</td>\n",
       "      <td>Office Supplies</td>\n",
       "      <td>Paper</td>\n",
       "      <td>Xerox Color Copier Paper, 11\" x 17\", Ream</td>\n",
       "      <td>45.680</td>\n",
       "      <td>2</td>\n",
       "      <td>0.0</td>\n",
       "      <td>21.0128</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>851</th>\n",
       "      <td>CA-2016-152534</td>\n",
       "      <td>2016-06-20</td>\n",
       "      <td>2016-06-25</td>\n",
       "      <td>Second Class</td>\n",
       "      <td>DP-13105</td>\n",
       "      <td>Dave Poirier</td>\n",
       "      <td>Corporate</td>\n",
       "      <td>Salinas</td>\n",
       "      <td>California</td>\n",
       "      <td>93905</td>\n",
       "      <td>West</td>\n",
       "      <td>OFF-PA-10001870</td>\n",
       "      <td>Office Supplies</td>\n",
       "      <td>Paper</td>\n",
       "      <td>Xerox 202</td>\n",
       "      <td>38.880</td>\n",
       "      <td>6</td>\n",
       "      <td>0.0</td>\n",
       "      <td>18.6624</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5115</th>\n",
       "      <td>CA-2017-125640</td>\n",
       "      <td>2017-07-25</td>\n",
       "      <td>2017-07-29</td>\n",
       "      <td>Standard Class</td>\n",
       "      <td>DD-13570</td>\n",
       "      <td>Dorothy Dickinson</td>\n",
       "      <td>Consumer</td>\n",
       "      <td>Philadelphia</td>\n",
       "      <td>Pennsylvania</td>\n",
       "      <td>19134</td>\n",
       "      <td>East</td>\n",
       "      <td>OFF-LA-10004178</td>\n",
       "      <td>Office Supplies</td>\n",
       "      <td>Labels</td>\n",
       "      <td>Avery 491</td>\n",
       "      <td>3.304</td>\n",
       "      <td>1</td>\n",
       "      <td>0.2</td>\n",
       "      <td>1.0738</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2727</th>\n",
       "      <td>CA-2017-119193</td>\n",
       "      <td>2017-12-22</td>\n",
       "      <td>2017-12-24</td>\n",
       "      <td>First Class</td>\n",
       "      <td>SK-19990</td>\n",
       "      <td>Sally Knutson</td>\n",
       "      <td>Consumer</td>\n",
       "      <td>Toledo</td>\n",
       "      <td>Ohio</td>\n",
       "      <td>43615</td>\n",
       "      <td>East</td>\n",
       "      <td>OFF-BI-10000848</td>\n",
       "      <td>Office Supplies</td>\n",
       "      <td>Binders</td>\n",
       "      <td>Angle-D Ring Binders</td>\n",
       "      <td>1.641</td>\n",
       "      <td>1</td>\n",
       "      <td>0.7</td>\n",
       "      <td>-1.3128</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5882</th>\n",
       "      <td>CA-2016-133935</td>\n",
       "      <td>2016-09-18</td>\n",
       "      <td>2016-09-22</td>\n",
       "      <td>Standard Class</td>\n",
       "      <td>JW-15220</td>\n",
       "      <td>Jane Waco</td>\n",
       "      <td>Corporate</td>\n",
       "      <td>San Diego</td>\n",
       "      <td>California</td>\n",
       "      <td>92105</td>\n",
       "      <td>West</td>\n",
       "      <td>FUR-CH-10002880</td>\n",
       "      <td>Furniture</td>\n",
       "      <td>Chairs</td>\n",
       "      <td>Global High-Back Leather Tilter, Burgundy</td>\n",
       "      <td>885.528</td>\n",
       "      <td>9</td>\n",
       "      <td>0.2</td>\n",
       "      <td>-99.6219</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9737</th>\n",
       "      <td>CA-2017-129294</td>\n",
       "      <td>2017-03-16</td>\n",
       "      <td>2017-03-21</td>\n",
       "      <td>Standard Class</td>\n",
       "      <td>KD-16615</td>\n",
       "      <td>Ken Dana</td>\n",
       "      <td>Corporate</td>\n",
       "      <td>Los Angeles</td>\n",
       "      <td>California</td>\n",
       "      <td>90032</td>\n",
       "      <td>West</td>\n",
       "      <td>OFF-ST-10002615</td>\n",
       "      <td>Office Supplies</td>\n",
       "      <td>Storage</td>\n",
       "      <td>Dual Level, Single-Width Filing Carts</td>\n",
       "      <td>310.120</td>\n",
       "      <td>2</td>\n",
       "      <td>0.0</td>\n",
       "      <td>80.6312</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7699</th>\n",
       "      <td>CA-2017-151799</td>\n",
       "      <td>2017-12-14</td>\n",
       "      <td>2017-12-18</td>\n",
       "      <td>Standard Class</td>\n",
       "      <td>BF-11170</td>\n",
       "      <td>Ben Ferrer</td>\n",
       "      <td>Home Office</td>\n",
       "      <td>Lawrence</td>\n",
       "      <td>Massachusetts</td>\n",
       "      <td>1841</td>\n",
       "      <td>East</td>\n",
       "      <td>OFF-SU-10001664</td>\n",
       "      <td>Office Supplies</td>\n",
       "      <td>Supplies</td>\n",
       "      <td>Acme Office Executive Series Stainless Steel T...</td>\n",
       "      <td>25.710</td>\n",
       "      <td>3</td>\n",
       "      <td>0.0</td>\n",
       "      <td>6.6846</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "            Order ID Order Date  Ship Date       Ship Mode Customer ID  \\\n",
       "2803  CA-2015-159380 2015-05-12 2015-05-16  Standard Class    CS-12505   \n",
       "8480  CA-2014-109890 2014-07-21 2014-07-27  Standard Class    PG-18820   \n",
       "7817  CA-2016-138478 2016-10-21 2016-10-26    Second Class    DP-13390   \n",
       "6450  CA-2015-156510 2015-09-25 2015-09-29  Standard Class    EH-13990   \n",
       "851   CA-2016-152534 2016-06-20 2016-06-25    Second Class    DP-13105   \n",
       "5115  CA-2017-125640 2017-07-25 2017-07-29  Standard Class    DD-13570   \n",
       "2727  CA-2017-119193 2017-12-22 2017-12-24     First Class    SK-19990   \n",
       "5882  CA-2016-133935 2016-09-18 2016-09-22  Standard Class    JW-15220   \n",
       "9737  CA-2017-129294 2017-03-16 2017-03-21  Standard Class    KD-16615   \n",
       "7699  CA-2017-151799 2017-12-14 2017-12-18  Standard Class    BF-11170   \n",
       "\n",
       "          Customer Name      Segment             City          State  \\\n",
       "2803      Cindy Stewart     Consumer    San Francisco     California   \n",
       "8480    Patrick Gardner     Consumer            Omaha       Nebraska   \n",
       "7817      Dennis Pardue  Home Office  North Las Vegas         Nevada   \n",
       "6450      Erica Hackney     Consumer          Meriden    Connecticut   \n",
       "851        Dave Poirier    Corporate          Salinas     California   \n",
       "5115  Dorothy Dickinson     Consumer     Philadelphia   Pennsylvania   \n",
       "2727      Sally Knutson     Consumer           Toledo           Ohio   \n",
       "5882          Jane Waco    Corporate        San Diego     California   \n",
       "9737           Ken Dana    Corporate      Los Angeles     California   \n",
       "7699         Ben Ferrer  Home Office         Lawrence  Massachusetts   \n",
       "\n",
       "      Postal Code   Region       Product ID         Category Sub-Category  \\\n",
       "2803        94122     West  OFF-PA-10003893  Office Supplies        Paper   \n",
       "8480        68104  Central  TEC-PH-10004100       Technology       Phones   \n",
       "7817        89031     West  OFF-PA-10001801  Office Supplies        Paper   \n",
       "6450         6450     East  OFF-PA-10002222  Office Supplies        Paper   \n",
       "851         93905     West  OFF-PA-10001870  Office Supplies        Paper   \n",
       "5115        19134     East  OFF-LA-10004178  Office Supplies       Labels   \n",
       "2727        43615     East  OFF-BI-10000848  Office Supplies      Binders   \n",
       "5882        92105     West  FUR-CH-10002880        Furniture       Chairs   \n",
       "9737        90032     West  OFF-ST-10002615  Office Supplies      Storage   \n",
       "7699         1841     East  OFF-SU-10001664  Office Supplies     Supplies   \n",
       "\n",
       "                                           Product Name    Sales  Quantity  \\\n",
       "2803                                         Xerox 1962   12.840         3   \n",
       "8480              Griffin GC17055 Auxiliary Audio Cable   35.980         2   \n",
       "7817                                          Xerox 193   35.880         6   \n",
       "6450          Xerox Color Copier Paper, 11\" x 17\", Ream   45.680         2   \n",
       "851                                           Xerox 202   38.880         6   \n",
       "5115                                          Avery 491    3.304         1   \n",
       "2727                               Angle-D Ring Binders    1.641         1   \n",
       "5882          Global High-Back Leather Tilter, Burgundy  885.528         9   \n",
       "9737              Dual Level, Single-Width Filing Carts  310.120         2   \n",
       "7699  Acme Office Executive Series Stainless Steel T...   25.710         3   \n",
       "\n",
       "      Discount   Profit  \n",
       "2803       0.0   5.7780  \n",
       "8480       0.0  10.0744  \n",
       "7817       0.0  17.5812  \n",
       "6450       0.0  21.0128  \n",
       "851        0.0  18.6624  \n",
       "5115       0.2   1.0738  \n",
       "2727       0.7  -1.3128  \n",
       "5882       0.2 -99.6219  \n",
       "9737       0.0  80.6312  \n",
       "7699       0.0   6.6846  "
      ]
     },
     "execution_count": 68,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.sample(frac=0.001)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 69,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Order ID</th>\n",
       "      <th>Order Date</th>\n",
       "      <th>Ship Date</th>\n",
       "      <th>Ship Mode</th>\n",
       "      <th>Customer ID</th>\n",
       "      <th>Customer Name</th>\n",
       "      <th>Segment</th>\n",
       "      <th>City</th>\n",
       "      <th>State</th>\n",
       "      <th>Postal Code</th>\n",
       "      <th>Region</th>\n",
       "      <th>Product ID</th>\n",
       "      <th>Category</th>\n",
       "      <th>Sub-Category</th>\n",
       "      <th>Product Name</th>\n",
       "      <th>Sales</th>\n",
       "      <th>Quantity</th>\n",
       "      <th>Discount</th>\n",
       "      <th>Profit</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>5325</th>\n",
       "      <td>CA-2015-119508</td>\n",
       "      <td>2015-12-04</td>\n",
       "      <td>2015-12-09</td>\n",
       "      <td>Standard Class</td>\n",
       "      <td>TZ-21580</td>\n",
       "      <td>Tracy Zic</td>\n",
       "      <td>Consumer</td>\n",
       "      <td>Lakewood</td>\n",
       "      <td>California</td>\n",
       "      <td>90712</td>\n",
       "      <td>West</td>\n",
       "      <td>FUR-FU-10004270</td>\n",
       "      <td>Furniture</td>\n",
       "      <td>Furnishings</td>\n",
       "      <td>Eldon Image Series Desk Accessories, Burgundy</td>\n",
       "      <td>25.080</td>\n",
       "      <td>6</td>\n",
       "      <td>0.0</td>\n",
       "      <td>9.0288</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8671</th>\n",
       "      <td>US-2014-159611</td>\n",
       "      <td>2014-12-27</td>\n",
       "      <td>2015-01-01</td>\n",
       "      <td>Second Class</td>\n",
       "      <td>KB-16315</td>\n",
       "      <td>Karl Braun</td>\n",
       "      <td>Consumer</td>\n",
       "      <td>Cleveland</td>\n",
       "      <td>Ohio</td>\n",
       "      <td>44105</td>\n",
       "      <td>East</td>\n",
       "      <td>OFF-ST-10002790</td>\n",
       "      <td>Office Supplies</td>\n",
       "      <td>Storage</td>\n",
       "      <td>Safco Industrial Shelving</td>\n",
       "      <td>118.160</td>\n",
       "      <td>2</td>\n",
       "      <td>0.2</td>\n",
       "      <td>-25.1090</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8945</th>\n",
       "      <td>CA-2014-128622</td>\n",
       "      <td>2014-11-15</td>\n",
       "      <td>2014-11-17</td>\n",
       "      <td>Second Class</td>\n",
       "      <td>SC-20260</td>\n",
       "      <td>Scott Cohen</td>\n",
       "      <td>Corporate</td>\n",
       "      <td>San Francisco</td>\n",
       "      <td>California</td>\n",
       "      <td>94110</td>\n",
       "      <td>West</td>\n",
       "      <td>OFF-SU-10001574</td>\n",
       "      <td>Office Supplies</td>\n",
       "      <td>Supplies</td>\n",
       "      <td>Acme Value Line Scissors</td>\n",
       "      <td>10.950</td>\n",
       "      <td>3</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3.2850</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>632</th>\n",
       "      <td>US-2016-135720</td>\n",
       "      <td>2016-12-11</td>\n",
       "      <td>2016-12-13</td>\n",
       "      <td>Second Class</td>\n",
       "      <td>FM-14380</td>\n",
       "      <td>Fred McMath</td>\n",
       "      <td>Consumer</td>\n",
       "      <td>Aurora</td>\n",
       "      <td>Colorado</td>\n",
       "      <td>80013</td>\n",
       "      <td>West</td>\n",
       "      <td>TEC-PH-10002103</td>\n",
       "      <td>Technology</td>\n",
       "      <td>Phones</td>\n",
       "      <td>Jabra SPEAK 410</td>\n",
       "      <td>300.768</td>\n",
       "      <td>4</td>\n",
       "      <td>0.2</td>\n",
       "      <td>30.0768</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4038</th>\n",
       "      <td>CA-2014-110786</td>\n",
       "      <td>2014-12-29</td>\n",
       "      <td>2015-01-02</td>\n",
       "      <td>Standard Class</td>\n",
       "      <td>AJ-10795</td>\n",
       "      <td>Anthony Johnson</td>\n",
       "      <td>Corporate</td>\n",
       "      <td>San Francisco</td>\n",
       "      <td>California</td>\n",
       "      <td>94110</td>\n",
       "      <td>West</td>\n",
       "      <td>OFF-PA-10000528</td>\n",
       "      <td>Office Supplies</td>\n",
       "      <td>Paper</td>\n",
       "      <td>Xerox 1981</td>\n",
       "      <td>21.120</td>\n",
       "      <td>4</td>\n",
       "      <td>0.0</td>\n",
       "      <td>9.5040</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9778</th>\n",
       "      <td>CA-2014-169019</td>\n",
       "      <td>2014-07-26</td>\n",
       "      <td>2014-07-30</td>\n",
       "      <td>Standard Class</td>\n",
       "      <td>LF-17185</td>\n",
       "      <td>Luke Foster</td>\n",
       "      <td>Consumer</td>\n",
       "      <td>San Antonio</td>\n",
       "      <td>Texas</td>\n",
       "      <td>78207</td>\n",
       "      <td>Central</td>\n",
       "      <td>OFF-BI-10001679</td>\n",
       "      <td>Office Supplies</td>\n",
       "      <td>Binders</td>\n",
       "      <td>GBC Instant Index System for Binding Systems</td>\n",
       "      <td>8.880</td>\n",
       "      <td>5</td>\n",
       "      <td>0.8</td>\n",
       "      <td>-13.3200</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2433</th>\n",
       "      <td>US-2017-112613</td>\n",
       "      <td>2017-05-28</td>\n",
       "      <td>2017-06-01</td>\n",
       "      <td>Standard Class</td>\n",
       "      <td>JH-15910</td>\n",
       "      <td>Jonathan Howell</td>\n",
       "      <td>Consumer</td>\n",
       "      <td>Houston</td>\n",
       "      <td>Texas</td>\n",
       "      <td>77070</td>\n",
       "      <td>Central</td>\n",
       "      <td>TEC-PH-10001536</td>\n",
       "      <td>Technology</td>\n",
       "      <td>Phones</td>\n",
       "      <td>Spigen Samsung Galaxy S5 Case Wallet</td>\n",
       "      <td>54.368</td>\n",
       "      <td>4</td>\n",
       "      <td>0.2</td>\n",
       "      <td>4.0776</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5907</th>\n",
       "      <td>US-2016-113985</td>\n",
       "      <td>2016-12-02</td>\n",
       "      <td>2016-12-07</td>\n",
       "      <td>Standard Class</td>\n",
       "      <td>KD-16495</td>\n",
       "      <td>Keith Dawkins</td>\n",
       "      <td>Corporate</td>\n",
       "      <td>San Jose</td>\n",
       "      <td>California</td>\n",
       "      <td>95123</td>\n",
       "      <td>West</td>\n",
       "      <td>OFF-BI-10002353</td>\n",
       "      <td>Office Supplies</td>\n",
       "      <td>Binders</td>\n",
       "      <td>GBC VeloBind Cover Sets</td>\n",
       "      <td>24.704</td>\n",
       "      <td>2</td>\n",
       "      <td>0.2</td>\n",
       "      <td>9.2640</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>483</th>\n",
       "      <td>CA-2014-127691</td>\n",
       "      <td>2014-07-22</td>\n",
       "      <td>2014-07-27</td>\n",
       "      <td>Standard Class</td>\n",
       "      <td>EM-14065</td>\n",
       "      <td>Erin Mull</td>\n",
       "      <td>Consumer</td>\n",
       "      <td>New York City</td>\n",
       "      <td>New York</td>\n",
       "      <td>10024</td>\n",
       "      <td>East</td>\n",
       "      <td>TEC-AC-10002567</td>\n",
       "      <td>Technology</td>\n",
       "      <td>Accessories</td>\n",
       "      <td>Logitech G602 Wireless Gaming Mouse</td>\n",
       "      <td>159.980</td>\n",
       "      <td>2</td>\n",
       "      <td>0.0</td>\n",
       "      <td>57.5928</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7519</th>\n",
       "      <td>US-2017-167920</td>\n",
       "      <td>2017-12-09</td>\n",
       "      <td>2017-12-12</td>\n",
       "      <td>Second Class</td>\n",
       "      <td>JL-15835</td>\n",
       "      <td>John Lee</td>\n",
       "      <td>Consumer</td>\n",
       "      <td>Richmond</td>\n",
       "      <td>Kentucky</td>\n",
       "      <td>40475</td>\n",
       "      <td>South</td>\n",
       "      <td>OFF-LA-10004409</td>\n",
       "      <td>Office Supplies</td>\n",
       "      <td>Labels</td>\n",
       "      <td>Avery 492</td>\n",
       "      <td>5.760</td>\n",
       "      <td>2</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.6496</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "            Order ID Order Date  Ship Date       Ship Mode Customer ID  \\\n",
       "5325  CA-2015-119508 2015-12-04 2015-12-09  Standard Class    TZ-21580   \n",
       "8671  US-2014-159611 2014-12-27 2015-01-01    Second Class    KB-16315   \n",
       "8945  CA-2014-128622 2014-11-15 2014-11-17    Second Class    SC-20260   \n",
       "632   US-2016-135720 2016-12-11 2016-12-13    Second Class    FM-14380   \n",
       "4038  CA-2014-110786 2014-12-29 2015-01-02  Standard Class    AJ-10795   \n",
       "9778  CA-2014-169019 2014-07-26 2014-07-30  Standard Class    LF-17185   \n",
       "2433  US-2017-112613 2017-05-28 2017-06-01  Standard Class    JH-15910   \n",
       "5907  US-2016-113985 2016-12-02 2016-12-07  Standard Class    KD-16495   \n",
       "483   CA-2014-127691 2014-07-22 2014-07-27  Standard Class    EM-14065   \n",
       "7519  US-2017-167920 2017-12-09 2017-12-12    Second Class    JL-15835   \n",
       "\n",
       "        Customer Name    Segment           City       State  Postal Code  \\\n",
       "5325        Tracy Zic   Consumer       Lakewood  California        90712   \n",
       "8671       Karl Braun   Consumer      Cleveland        Ohio        44105   \n",
       "8945      Scott Cohen  Corporate  San Francisco  California        94110   \n",
       "632       Fred McMath   Consumer         Aurora    Colorado        80013   \n",
       "4038  Anthony Johnson  Corporate  San Francisco  California        94110   \n",
       "9778      Luke Foster   Consumer    San Antonio       Texas        78207   \n",
       "2433  Jonathan Howell   Consumer        Houston       Texas        77070   \n",
       "5907    Keith Dawkins  Corporate       San Jose  California        95123   \n",
       "483         Erin Mull   Consumer  New York City    New York        10024   \n",
       "7519         John Lee   Consumer       Richmond    Kentucky        40475   \n",
       "\n",
       "       Region       Product ID         Category Sub-Category  \\\n",
       "5325     West  FUR-FU-10004270        Furniture  Furnishings   \n",
       "8671     East  OFF-ST-10002790  Office Supplies      Storage   \n",
       "8945     West  OFF-SU-10001574  Office Supplies     Supplies   \n",
       "632      West  TEC-PH-10002103       Technology       Phones   \n",
       "4038     West  OFF-PA-10000528  Office Supplies        Paper   \n",
       "9778  Central  OFF-BI-10001679  Office Supplies      Binders   \n",
       "2433  Central  TEC-PH-10001536       Technology       Phones   \n",
       "5907     West  OFF-BI-10002353  Office Supplies      Binders   \n",
       "483      East  TEC-AC-10002567       Technology  Accessories   \n",
       "7519    South  OFF-LA-10004409  Office Supplies       Labels   \n",
       "\n",
       "                                       Product Name    Sales  Quantity  \\\n",
       "5325  Eldon Image Series Desk Accessories, Burgundy   25.080         6   \n",
       "8671                      Safco Industrial Shelving  118.160         2   \n",
       "8945                       Acme Value Line Scissors   10.950         3   \n",
       "632                                 Jabra SPEAK 410  300.768         4   \n",
       "4038                                     Xerox 1981   21.120         4   \n",
       "9778   GBC Instant Index System for Binding Systems    8.880         5   \n",
       "2433           Spigen Samsung Galaxy S5 Case Wallet   54.368         4   \n",
       "5907                        GBC VeloBind Cover Sets   24.704         2   \n",
       "483             Logitech G602 Wireless Gaming Mouse  159.980         2   \n",
       "7519                                      Avery 492    5.760         2   \n",
       "\n",
       "      Discount   Profit  \n",
       "5325       0.0   9.0288  \n",
       "8671       0.2 -25.1090  \n",
       "8945       0.0   3.2850  \n",
       "632        0.2  30.0768  \n",
       "4038       0.0   9.5040  \n",
       "9778       0.8 -13.3200  \n",
       "2433       0.2   4.0776  \n",
       "5907       0.2   9.2640  \n",
       "483        0.0  57.5928  \n",
       "7519       0.0   2.6496  "
      ]
     },
     "execution_count": 69,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.sample(frac=0.001,replace=True)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Exercise 16: Pandas `value_count` method to return unique records"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 70,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "William Brown          37\n",
       "John Lee               34\n",
       "Matt Abelman           34\n",
       "Paul Prost             34\n",
       "Chloris Kastensmidt    32\n",
       "Edward Hooks           32\n",
       "Seth Vernon            32\n",
       "Jonathan Doherty       32\n",
       "Arthur Prichep         31\n",
       "Emily Phan             31\n",
       "Name: Customer Name, dtype: int64"
      ]
     },
     "execution_count": 70,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['Customer Name'].value_counts()[:10]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Exercise 17: Pivot table functionality - `pivot_table`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 71,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_sample=df.sample(n=100)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 72,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>Profit</th>\n",
       "      <th>Quantity</th>\n",
       "      <th>Sales</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Region</th>\n",
       "      <th>State</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"6\" valign=\"top\">Central</th>\n",
       "      <th>Illinois</th>\n",
       "      <td>-6.779400</td>\n",
       "      <td>3.300000</td>\n",
       "      <td>96.905900</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Indiana</th>\n",
       "      <td>21.013533</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>64.860000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Michigan</th>\n",
       "      <td>70.006000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>411.800000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Minnesota</th>\n",
       "      <td>275.478000</td>\n",
       "      <td>5.000000</td>\n",
       "      <td>655.900000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Texas</th>\n",
       "      <td>-2.045429</td>\n",
       "      <td>3.142857</td>\n",
       "      <td>21.738571</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Wisconsin</th>\n",
       "      <td>82.077600</td>\n",
       "      <td>6.000000</td>\n",
       "      <td>373.080000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"8\" valign=\"top\">East</th>\n",
       "      <th>Connecticut</th>\n",
       "      <td>2.006400</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>4.560000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Delaware</th>\n",
       "      <td>79.891200</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>166.440000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Massachusetts</th>\n",
       "      <td>35.864400</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>137.940000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>New Hampshire</th>\n",
       "      <td>17.220000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>34.440000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>New Jersey</th>\n",
       "      <td>34.940200</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>286.070000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>New York</th>\n",
       "      <td>49.986046</td>\n",
       "      <td>2.692308</td>\n",
       "      <td>214.492000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Ohio</th>\n",
       "      <td>-8.391667</td>\n",
       "      <td>5.333333</td>\n",
       "      <td>23.725333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Pennsylvania</th>\n",
       "      <td>-175.385833</td>\n",
       "      <td>3.666667</td>\n",
       "      <td>791.396333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"5\" valign=\"top\">South</th>\n",
       "      <th>Florida</th>\n",
       "      <td>-6.142120</td>\n",
       "      <td>2.400000</td>\n",
       "      <td>37.438000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Georgia</th>\n",
       "      <td>53.423300</td>\n",
       "      <td>4.142857</td>\n",
       "      <td>162.612857</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Mississippi</th>\n",
       "      <td>78.672000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>262.240000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>North Carolina</th>\n",
       "      <td>3.500400</td>\n",
       "      <td>2.500000</td>\n",
       "      <td>27.708000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Tennessee</th>\n",
       "      <td>2.640200</td>\n",
       "      <td>3.333333</td>\n",
       "      <td>33.323000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"6\" valign=\"top\">West</th>\n",
       "      <th>Arizona</th>\n",
       "      <td>-42.411600</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>242.352000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>California</th>\n",
       "      <td>48.919118</td>\n",
       "      <td>3.818182</td>\n",
       "      <td>377.261818</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Idaho</th>\n",
       "      <td>3.354400</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>9.584000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Nevada</th>\n",
       "      <td>-109.582200</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>674.352000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Oregon</th>\n",
       "      <td>23.028000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>230.280000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Washington</th>\n",
       "      <td>2.075280</td>\n",
       "      <td>6.000000</td>\n",
       "      <td>162.782400</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                            Profit  Quantity       Sales\n",
       "Region  State                                           \n",
       "Central Illinois         -6.779400  3.300000   96.905900\n",
       "        Indiana          21.013533  4.000000   64.860000\n",
       "        Michigan         70.006000  2.000000  411.800000\n",
       "        Minnesota       275.478000  5.000000  655.900000\n",
       "        Texas            -2.045429  3.142857   21.738571\n",
       "        Wisconsin        82.077600  6.000000  373.080000\n",
       "East    Connecticut       2.006400  2.000000    4.560000\n",
       "        Delaware         79.891200  3.000000  166.440000\n",
       "        Massachusetts    35.864400  3.000000  137.940000\n",
       "        New Hampshire    17.220000  3.000000   34.440000\n",
       "        New Jersey       34.940200  3.000000  286.070000\n",
       "        New York         49.986046  2.692308  214.492000\n",
       "        Ohio             -8.391667  5.333333   23.725333\n",
       "        Pennsylvania   -175.385833  3.666667  791.396333\n",
       "South   Florida          -6.142120  2.400000   37.438000\n",
       "        Georgia          53.423300  4.142857  162.612857\n",
       "        Mississippi      78.672000  2.000000  262.240000\n",
       "        North Carolina    3.500400  2.500000   27.708000\n",
       "        Tennessee         2.640200  3.333333   33.323000\n",
       "West    Arizona         -42.411600  3.000000  242.352000\n",
       "        California       48.919118  3.818182  377.261818\n",
       "        Idaho             3.354400  1.000000    9.584000\n",
       "        Nevada         -109.582200  3.000000  674.352000\n",
       "        Oregon           23.028000  3.000000  230.280000\n",
       "        Washington        2.075280  6.000000  162.782400"
      ]
     },
     "execution_count": 72,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_sample.pivot_table(values=['Sales','Quantity','Profit'],index=['Region','State'],aggfunc='mean')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Exercise 18: Sorting by particular column"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 73,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Customer Name</th>\n",
       "      <th>State</th>\n",
       "      <th>Sales</th>\n",
       "      <th>Quantity</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>9457</th>\n",
       "      <td>Bart Watters</td>\n",
       "      <td>North Carolina</td>\n",
       "      <td>12.060</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5800</th>\n",
       "      <td>Michael Nguyen</td>\n",
       "      <td>Texas</td>\n",
       "      <td>575.968</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4940</th>\n",
       "      <td>Ivan Liston</td>\n",
       "      <td>Delaware</td>\n",
       "      <td>1268.820</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7807</th>\n",
       "      <td>Vivek Sundaresam</td>\n",
       "      <td>North Carolina</td>\n",
       "      <td>30.828</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3109</th>\n",
       "      <td>Andrew Allen</td>\n",
       "      <td>Missouri</td>\n",
       "      <td>7.640</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8243</th>\n",
       "      <td>Ben Wallace</td>\n",
       "      <td>California</td>\n",
       "      <td>806.336</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3873</th>\n",
       "      <td>Darrin Martin</td>\n",
       "      <td>Tennessee</td>\n",
       "      <td>23.472</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5281</th>\n",
       "      <td>Sung Pak</td>\n",
       "      <td>Minnesota</td>\n",
       "      <td>6.160</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7752</th>\n",
       "      <td>Keith Herrera</td>\n",
       "      <td>Virginia</td>\n",
       "      <td>33.020</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>482</th>\n",
       "      <td>Erin Mull</td>\n",
       "      <td>New York</td>\n",
       "      <td>5.960</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>602</th>\n",
       "      <td>Alan Shonely</td>\n",
       "      <td>Florida</td>\n",
       "      <td>142.776</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9768</th>\n",
       "      <td>Tracy Collins</td>\n",
       "      <td>California</td>\n",
       "      <td>50.040</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1695</th>\n",
       "      <td>Phillina Ober</td>\n",
       "      <td>New Jersey</td>\n",
       "      <td>14.460</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5510</th>\n",
       "      <td>Joy Daniels</td>\n",
       "      <td>Illinois</td>\n",
       "      <td>11.120</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1553</th>\n",
       "      <td>Bobby Odegard</td>\n",
       "      <td>Kentucky</td>\n",
       "      <td>124.750</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         Customer Name           State     Sales  Quantity\n",
       "9457      Bart Watters  North Carolina    12.060         5\n",
       "5800    Michael Nguyen           Texas   575.968         4\n",
       "4940       Ivan Liston        Delaware  1268.820         9\n",
       "7807  Vivek Sundaresam  North Carolina    30.828         7\n",
       "3109      Andrew Allen        Missouri     7.640         1\n",
       "8243       Ben Wallace      California   806.336         8\n",
       "3873     Darrin Martin       Tennessee    23.472         3\n",
       "5281          Sung Pak       Minnesota     6.160         2\n",
       "7752     Keith Herrera        Virginia    33.020         2\n",
       "482          Erin Mull        New York     5.960         2\n",
       "602       Alan Shonely         Florida   142.776         1\n",
       "9768     Tracy Collins      California    50.040         6\n",
       "1695     Phillina Ober      New Jersey    14.460         3\n",
       "5510       Joy Daniels        Illinois    11.120         2\n",
       "1553     Bobby Odegard        Kentucky   124.750         5"
      ]
     },
     "execution_count": 73,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_sample=df[['Customer Name','State','Sales','Quantity']].sample(n=15)\n",
    "df_sample"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 74,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Customer Name</th>\n",
       "      <th>State</th>\n",
       "      <th>Sales</th>\n",
       "      <th>Quantity</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>482</th>\n",
       "      <td>Erin Mull</td>\n",
       "      <td>New York</td>\n",
       "      <td>5.960</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5281</th>\n",
       "      <td>Sung Pak</td>\n",
       "      <td>Minnesota</td>\n",
       "      <td>6.160</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3109</th>\n",
       "      <td>Andrew Allen</td>\n",
       "      <td>Missouri</td>\n",
       "      <td>7.640</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5510</th>\n",
       "      <td>Joy Daniels</td>\n",
       "      <td>Illinois</td>\n",
       "      <td>11.120</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9457</th>\n",
       "      <td>Bart Watters</td>\n",
       "      <td>North Carolina</td>\n",
       "      <td>12.060</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1695</th>\n",
       "      <td>Phillina Ober</td>\n",
       "      <td>New Jersey</td>\n",
       "      <td>14.460</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3873</th>\n",
       "      <td>Darrin Martin</td>\n",
       "      <td>Tennessee</td>\n",
       "      <td>23.472</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7807</th>\n",
       "      <td>Vivek Sundaresam</td>\n",
       "      <td>North Carolina</td>\n",
       "      <td>30.828</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7752</th>\n",
       "      <td>Keith Herrera</td>\n",
       "      <td>Virginia</td>\n",
       "      <td>33.020</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9768</th>\n",
       "      <td>Tracy Collins</td>\n",
       "      <td>California</td>\n",
       "      <td>50.040</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1553</th>\n",
       "      <td>Bobby Odegard</td>\n",
       "      <td>Kentucky</td>\n",
       "      <td>124.750</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>602</th>\n",
       "      <td>Alan Shonely</td>\n",
       "      <td>Florida</td>\n",
       "      <td>142.776</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5800</th>\n",
       "      <td>Michael Nguyen</td>\n",
       "      <td>Texas</td>\n",
       "      <td>575.968</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8243</th>\n",
       "      <td>Ben Wallace</td>\n",
       "      <td>California</td>\n",
       "      <td>806.336</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4940</th>\n",
       "      <td>Ivan Liston</td>\n",
       "      <td>Delaware</td>\n",
       "      <td>1268.820</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         Customer Name           State     Sales  Quantity\n",
       "482          Erin Mull        New York     5.960         2\n",
       "5281          Sung Pak       Minnesota     6.160         2\n",
       "3109      Andrew Allen        Missouri     7.640         1\n",
       "5510       Joy Daniels        Illinois    11.120         2\n",
       "9457      Bart Watters  North Carolina    12.060         5\n",
       "1695     Phillina Ober      New Jersey    14.460         3\n",
       "3873     Darrin Martin       Tennessee    23.472         3\n",
       "7807  Vivek Sundaresam  North Carolina    30.828         7\n",
       "7752     Keith Herrera        Virginia    33.020         2\n",
       "9768     Tracy Collins      California    50.040         6\n",
       "1553     Bobby Odegard        Kentucky   124.750         5\n",
       "602       Alan Shonely         Florida   142.776         1\n",
       "5800    Michael Nguyen           Texas   575.968         4\n",
       "8243       Ben Wallace      California   806.336         8\n",
       "4940       Ivan Liston        Delaware  1268.820         9"
      ]
     },
     "execution_count": 74,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_sample.sort_values(by='Sales')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 75,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Customer Name</th>\n",
       "      <th>State</th>\n",
       "      <th>Sales</th>\n",
       "      <th>Quantity</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>9768</th>\n",
       "      <td>Tracy Collins</td>\n",
       "      <td>California</td>\n",
       "      <td>50.040</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8243</th>\n",
       "      <td>Ben Wallace</td>\n",
       "      <td>California</td>\n",
       "      <td>806.336</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4940</th>\n",
       "      <td>Ivan Liston</td>\n",
       "      <td>Delaware</td>\n",
       "      <td>1268.820</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>602</th>\n",
       "      <td>Alan Shonely</td>\n",
       "      <td>Florida</td>\n",
       "      <td>142.776</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5510</th>\n",
       "      <td>Joy Daniels</td>\n",
       "      <td>Illinois</td>\n",
       "      <td>11.120</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1553</th>\n",
       "      <td>Bobby Odegard</td>\n",
       "      <td>Kentucky</td>\n",
       "      <td>124.750</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5281</th>\n",
       "      <td>Sung Pak</td>\n",
       "      <td>Minnesota</td>\n",
       "      <td>6.160</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3109</th>\n",
       "      <td>Andrew Allen</td>\n",
       "      <td>Missouri</td>\n",
       "      <td>7.640</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1695</th>\n",
       "      <td>Phillina Ober</td>\n",
       "      <td>New Jersey</td>\n",
       "      <td>14.460</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>482</th>\n",
       "      <td>Erin Mull</td>\n",
       "      <td>New York</td>\n",
       "      <td>5.960</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9457</th>\n",
       "      <td>Bart Watters</td>\n",
       "      <td>North Carolina</td>\n",
       "      <td>12.060</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7807</th>\n",
       "      <td>Vivek Sundaresam</td>\n",
       "      <td>North Carolina</td>\n",
       "      <td>30.828</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3873</th>\n",
       "      <td>Darrin Martin</td>\n",
       "      <td>Tennessee</td>\n",
       "      <td>23.472</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5800</th>\n",
       "      <td>Michael Nguyen</td>\n",
       "      <td>Texas</td>\n",
       "      <td>575.968</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7752</th>\n",
       "      <td>Keith Herrera</td>\n",
       "      <td>Virginia</td>\n",
       "      <td>33.020</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         Customer Name           State     Sales  Quantity\n",
       "9768     Tracy Collins      California    50.040         6\n",
       "8243       Ben Wallace      California   806.336         8\n",
       "4940       Ivan Liston        Delaware  1268.820         9\n",
       "602       Alan Shonely         Florida   142.776         1\n",
       "5510       Joy Daniels        Illinois    11.120         2\n",
       "1553     Bobby Odegard        Kentucky   124.750         5\n",
       "5281          Sung Pak       Minnesota     6.160         2\n",
       "3109      Andrew Allen        Missouri     7.640         1\n",
       "1695     Phillina Ober      New Jersey    14.460         3\n",
       "482          Erin Mull        New York     5.960         2\n",
       "9457      Bart Watters  North Carolina    12.060         5\n",
       "7807  Vivek Sundaresam  North Carolina    30.828         7\n",
       "3873     Darrin Martin       Tennessee    23.472         3\n",
       "5800    Michael Nguyen           Texas   575.968         4\n",
       "7752     Keith Herrera        Virginia    33.020         2"
      ]
     },
     "execution_count": 75,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_sample.sort_values(by=['State','Sales'])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Exercise 19: Flexibility for user-defined function with `apply` method"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 76,
   "metadata": {},
   "outputs": [],
   "source": [
    "def categorize_sales(price):\n",
    "    if price < 50:\n",
    "        return \"Low\"\n",
    "    elif price < 200:\n",
    "        return \"Medium\"\n",
    "    else:\n",
    "        return \"High\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 77,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Customer Name</th>\n",
       "      <th>State</th>\n",
       "      <th>Sales</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>4382</th>\n",
       "      <td>Russell Applegate</td>\n",
       "      <td>Illinois</td>\n",
       "      <td>2.880</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1533</th>\n",
       "      <td>Ellis Ballard</td>\n",
       "      <td>New York</td>\n",
       "      <td>212.880</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6268</th>\n",
       "      <td>Robert Marley</td>\n",
       "      <td>Massachusetts</td>\n",
       "      <td>22.450</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>401</th>\n",
       "      <td>Anna Gayman</td>\n",
       "      <td>Texas</td>\n",
       "      <td>57.584</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5668</th>\n",
       "      <td>Sanjit Engle</td>\n",
       "      <td>Washington</td>\n",
       "      <td>53.984</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9035</th>\n",
       "      <td>Eric Murdock</td>\n",
       "      <td>Illinois</td>\n",
       "      <td>108.768</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>240</th>\n",
       "      <td>Ken Lonsdale</td>\n",
       "      <td>Illinois</td>\n",
       "      <td>31.984</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2279</th>\n",
       "      <td>Jim Karlsson</td>\n",
       "      <td>Washington</td>\n",
       "      <td>97.568</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6823</th>\n",
       "      <td>Yana Sorensen</td>\n",
       "      <td>Alabama</td>\n",
       "      <td>98.460</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9488</th>\n",
       "      <td>Eric Murdock</td>\n",
       "      <td>Oregon</td>\n",
       "      <td>572.800</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          Customer Name          State    Sales\n",
       "4382  Russell Applegate       Illinois    2.880\n",
       "1533      Ellis Ballard       New York  212.880\n",
       "6268      Robert Marley  Massachusetts   22.450\n",
       "401         Anna Gayman          Texas   57.584\n",
       "5668       Sanjit Engle     Washington   53.984\n",
       "9035       Eric Murdock       Illinois  108.768\n",
       "240        Ken Lonsdale       Illinois   31.984\n",
       "2279       Jim Karlsson     Washington   97.568\n",
       "6823      Yana Sorensen        Alabama   98.460\n",
       "9488       Eric Murdock         Oregon  572.800"
      ]
     },
     "execution_count": 77,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_sample=df[['Customer Name','State','Sales']].sample(n=100)\n",
    "df_sample.head(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 78,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Customer Name</th>\n",
       "      <th>State</th>\n",
       "      <th>Sales</th>\n",
       "      <th>Sales Price Category</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>4382</th>\n",
       "      <td>Russell Applegate</td>\n",
       "      <td>Illinois</td>\n",
       "      <td>2.880</td>\n",
       "      <td>Low</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1533</th>\n",
       "      <td>Ellis Ballard</td>\n",
       "      <td>New York</td>\n",
       "      <td>212.880</td>\n",
       "      <td>High</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6268</th>\n",
       "      <td>Robert Marley</td>\n",
       "      <td>Massachusetts</td>\n",
       "      <td>22.450</td>\n",
       "      <td>Low</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>401</th>\n",
       "      <td>Anna Gayman</td>\n",
       "      <td>Texas</td>\n",
       "      <td>57.584</td>\n",
       "      <td>Medium</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5668</th>\n",
       "      <td>Sanjit Engle</td>\n",
       "      <td>Washington</td>\n",
       "      <td>53.984</td>\n",
       "      <td>Medium</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9035</th>\n",
       "      <td>Eric Murdock</td>\n",
       "      <td>Illinois</td>\n",
       "      <td>108.768</td>\n",
       "      <td>Medium</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>240</th>\n",
       "      <td>Ken Lonsdale</td>\n",
       "      <td>Illinois</td>\n",
       "      <td>31.984</td>\n",
       "      <td>Low</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2279</th>\n",
       "      <td>Jim Karlsson</td>\n",
       "      <td>Washington</td>\n",
       "      <td>97.568</td>\n",
       "      <td>Medium</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6823</th>\n",
       "      <td>Yana Sorensen</td>\n",
       "      <td>Alabama</td>\n",
       "      <td>98.460</td>\n",
       "      <td>Medium</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9488</th>\n",
       "      <td>Eric Murdock</td>\n",
       "      <td>Oregon</td>\n",
       "      <td>572.800</td>\n",
       "      <td>High</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          Customer Name          State    Sales Sales Price Category\n",
       "4382  Russell Applegate       Illinois    2.880                  Low\n",
       "1533      Ellis Ballard       New York  212.880                 High\n",
       "6268      Robert Marley  Massachusetts   22.450                  Low\n",
       "401         Anna Gayman          Texas   57.584               Medium\n",
       "5668       Sanjit Engle     Washington   53.984               Medium\n",
       "9035       Eric Murdock       Illinois  108.768               Medium\n",
       "240        Ken Lonsdale       Illinois   31.984                  Low\n",
       "2279       Jim Karlsson     Washington   97.568               Medium\n",
       "6823      Yana Sorensen        Alabama   98.460               Medium\n",
       "9488       Eric Murdock         Oregon  572.800                 High"
      ]
     },
     "execution_count": 78,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_sample['Sales Price Category']=df_sample['Sales'].apply(categorize_sales)\n",
    "df_sample.head(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 79,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Customer Name</th>\n",
       "      <th>State</th>\n",
       "      <th>Sales</th>\n",
       "      <th>Sales Price Category</th>\n",
       "      <th>Customer Name Length</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>4382</th>\n",
       "      <td>Russell Applegate</td>\n",
       "      <td>Illinois</td>\n",
       "      <td>2.880</td>\n",
       "      <td>Low</td>\n",
       "      <td>17</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1533</th>\n",
       "      <td>Ellis Ballard</td>\n",
       "      <td>New York</td>\n",
       "      <td>212.880</td>\n",
       "      <td>High</td>\n",
       "      <td>13</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6268</th>\n",
       "      <td>Robert Marley</td>\n",
       "      <td>Massachusetts</td>\n",
       "      <td>22.450</td>\n",
       "      <td>Low</td>\n",
       "      <td>13</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>401</th>\n",
       "      <td>Anna Gayman</td>\n",
       "      <td>Texas</td>\n",
       "      <td>57.584</td>\n",
       "      <td>Medium</td>\n",
       "      <td>11</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5668</th>\n",
       "      <td>Sanjit Engle</td>\n",
       "      <td>Washington</td>\n",
       "      <td>53.984</td>\n",
       "      <td>Medium</td>\n",
       "      <td>12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9035</th>\n",
       "      <td>Eric Murdock</td>\n",
       "      <td>Illinois</td>\n",
       "      <td>108.768</td>\n",
       "      <td>Medium</td>\n",
       "      <td>12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>240</th>\n",
       "      <td>Ken Lonsdale</td>\n",
       "      <td>Illinois</td>\n",
       "      <td>31.984</td>\n",
       "      <td>Low</td>\n",
       "      <td>12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2279</th>\n",
       "      <td>Jim Karlsson</td>\n",
       "      <td>Washington</td>\n",
       "      <td>97.568</td>\n",
       "      <td>Medium</td>\n",
       "      <td>12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6823</th>\n",
       "      <td>Yana Sorensen</td>\n",
       "      <td>Alabama</td>\n",
       "      <td>98.460</td>\n",
       "      <td>Medium</td>\n",
       "      <td>13</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9488</th>\n",
       "      <td>Eric Murdock</td>\n",
       "      <td>Oregon</td>\n",
       "      <td>572.800</td>\n",
       "      <td>High</td>\n",
       "      <td>12</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          Customer Name          State    Sales Sales Price Category  \\\n",
       "4382  Russell Applegate       Illinois    2.880                  Low   \n",
       "1533      Ellis Ballard       New York  212.880                 High   \n",
       "6268      Robert Marley  Massachusetts   22.450                  Low   \n",
       "401         Anna Gayman          Texas   57.584               Medium   \n",
       "5668       Sanjit Engle     Washington   53.984               Medium   \n",
       "9035       Eric Murdock       Illinois  108.768               Medium   \n",
       "240        Ken Lonsdale       Illinois   31.984                  Low   \n",
       "2279       Jim Karlsson     Washington   97.568               Medium   \n",
       "6823      Yana Sorensen        Alabama   98.460               Medium   \n",
       "9488       Eric Murdock         Oregon  572.800                 High   \n",
       "\n",
       "      Customer Name Length  \n",
       "4382                    17  \n",
       "1533                    13  \n",
       "6268                    13  \n",
       "401                     11  \n",
       "5668                    12  \n",
       "9035                    12  \n",
       "240                     12  \n",
       "2279                    12  \n",
       "6823                    13  \n",
       "9488                    12  "
      ]
     },
     "execution_count": 79,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_sample['Customer Name Length']=df_sample['Customer Name'].apply(len)\n",
    "df_sample.head(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 80,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Customer Name</th>\n",
       "      <th>State</th>\n",
       "      <th>Sales</th>\n",
       "      <th>Sales Price Category</th>\n",
       "      <th>Customer Name Length</th>\n",
       "      <th>Discounted Price</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>4382</th>\n",
       "      <td>Russell Applegate</td>\n",
       "      <td>Illinois</td>\n",
       "      <td>2.880</td>\n",
       "      <td>Low</td>\n",
       "      <td>17</td>\n",
       "      <td>2.880</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1533</th>\n",
       "      <td>Ellis Ballard</td>\n",
       "      <td>New York</td>\n",
       "      <td>212.880</td>\n",
       "      <td>High</td>\n",
       "      <td>13</td>\n",
       "      <td>180.948</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6268</th>\n",
       "      <td>Robert Marley</td>\n",
       "      <td>Massachusetts</td>\n",
       "      <td>22.450</td>\n",
       "      <td>Low</td>\n",
       "      <td>13</td>\n",
       "      <td>22.450</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>401</th>\n",
       "      <td>Anna Gayman</td>\n",
       "      <td>Texas</td>\n",
       "      <td>57.584</td>\n",
       "      <td>Medium</td>\n",
       "      <td>11</td>\n",
       "      <td>57.584</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5668</th>\n",
       "      <td>Sanjit Engle</td>\n",
       "      <td>Washington</td>\n",
       "      <td>53.984</td>\n",
       "      <td>Medium</td>\n",
       "      <td>12</td>\n",
       "      <td>53.984</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9035</th>\n",
       "      <td>Eric Murdock</td>\n",
       "      <td>Illinois</td>\n",
       "      <td>108.768</td>\n",
       "      <td>Medium</td>\n",
       "      <td>12</td>\n",
       "      <td>108.768</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>240</th>\n",
       "      <td>Ken Lonsdale</td>\n",
       "      <td>Illinois</td>\n",
       "      <td>31.984</td>\n",
       "      <td>Low</td>\n",
       "      <td>12</td>\n",
       "      <td>31.984</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2279</th>\n",
       "      <td>Jim Karlsson</td>\n",
       "      <td>Washington</td>\n",
       "      <td>97.568</td>\n",
       "      <td>Medium</td>\n",
       "      <td>12</td>\n",
       "      <td>97.568</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6823</th>\n",
       "      <td>Yana Sorensen</td>\n",
       "      <td>Alabama</td>\n",
       "      <td>98.460</td>\n",
       "      <td>Medium</td>\n",
       "      <td>13</td>\n",
       "      <td>98.460</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9488</th>\n",
       "      <td>Eric Murdock</td>\n",
       "      <td>Oregon</td>\n",
       "      <td>572.800</td>\n",
       "      <td>High</td>\n",
       "      <td>12</td>\n",
       "      <td>486.880</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          Customer Name          State    Sales Sales Price Category  \\\n",
       "4382  Russell Applegate       Illinois    2.880                  Low   \n",
       "1533      Ellis Ballard       New York  212.880                 High   \n",
       "6268      Robert Marley  Massachusetts   22.450                  Low   \n",
       "401         Anna Gayman          Texas   57.584               Medium   \n",
       "5668       Sanjit Engle     Washington   53.984               Medium   \n",
       "9035       Eric Murdock       Illinois  108.768               Medium   \n",
       "240        Ken Lonsdale       Illinois   31.984                  Low   \n",
       "2279       Jim Karlsson     Washington   97.568               Medium   \n",
       "6823      Yana Sorensen        Alabama   98.460               Medium   \n",
       "9488       Eric Murdock         Oregon  572.800                 High   \n",
       "\n",
       "      Customer Name Length  Discounted Price  \n",
       "4382                    17             2.880  \n",
       "1533                    13           180.948  \n",
       "6268                    13            22.450  \n",
       "401                     11            57.584  \n",
       "5668                    12            53.984  \n",
       "9035                    12           108.768  \n",
       "240                     12            31.984  \n",
       "2279                    12            97.568  \n",
       "6823                    13            98.460  \n",
       "9488                    12           486.880  "
      ]
     },
     "execution_count": 80,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_sample['Discounted Price']=df_sample['Sales'].apply(lambda x:0.85*x if x>200 else x)\n",
    "df_sample.head(10)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.8"
  },
  "latex_envs": {
   "LaTeX_envs_menu_present": true,
   "autoclose": false,
   "autocomplete": true,
   "bibliofile": "biblio.bib",
   "cite_by": "apalike",
   "current_citInitial": 1,
   "eqLabelWithNumbers": true,
   "eqNumInitial": 1,
   "hotkeys": {
    "equation": "Ctrl-E",
    "itemize": "Ctrl-I"
   },
   "labels_anchors": false,
   "latex_user_defs": false,
   "report_style_numbering": false,
   "user_envs_cfg": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
